#!/usr/bin/env python import os, glob, re path = os.path.join(os.path.dirname(__file__), 'download', 'mobiticket', 'rest-pages', '*.wml') re_addrline = re.compile(r"Nombre:(.*)") re_rows = re.compile(r"(.*?)
") re_phone = re.compile(r"Llamar(.*?)<") re_latlon = re.compile(r"maps\.google\.com.*center=(.*),(.*)&maptype") def remove_tabs(str): return replace(str, "\t", " ") print "#Id\tRestaurant\tAddress\tCP\tTown\tProvince\tTelephone\tFood\tLatitude\tLongitude" for infile in glob.glob(path): id = infile.split("/")[-1].split(".")[0] f = open(infile, 'r') wml = f.read() try: addrline = re_addrline.findall(wml)[0] except IndexError: continue rows = re_rows.findall(addrline) name = rows[0].strip() try: addr = rows[1].split(":")[1].strip() except: addr = "NULL" town = rows[2].split("(")[0].strip() cp = rows[2].split("(")[1].strip()[0:-1] try: phone = re_phone.findall(wml)[0].strip() except IndexError: phone = "NULL" try: latlon= re_latlon.findall(wml)[0] lat = latlon[0] lon = latlon[1] except IndexError: lat = "NULL" lon = "NULL" result = [id, name, addr, cp, town, "NULL", phone, "NULL", lat, lon] print "\t".join(result)