1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
| # -*- coding: utf-8 -*- import sqlite3 import urllib import urllib2 import os import re import sys import json import smtplib import pytesseract import time from email.mime.text import MIMEText from email.header import Header from PIL import Image
# 第三方 SMTP 服务 mail_host="" #设置服务器 mail_user="" #用户名 mail_pass="" #口令 sender = "" #发送邮箱 一般等同于用户名 receivers = ['[email protected]'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱
BaseURL = 'http://m.ziroom.com/v7/room/list.json?city_code=110000&type=11&keywords=' #keywords = '%E9%BE%99%E5%8D%8E%E5%9B%AD' keywords = ['华清嘉园' , '展春园'] cur_page = 1 room_string = ''
def sendEmail(content,keyword): message = MIMEText(content, 'plain', 'utf-8') message['From'] = '自如<'+sender+'>' message['To'] = 'lefo<[email protected]>' subject = '有'+ keyword +'的新房子了' message['Subject'] = Header(subject, 'utf-8') try: smtpObj = smtplib.SMTP() smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号 smtpObj.login(mail_user,mail_pass) smtpObj.sendmail(sender, receivers, message.as_string()) print("邮件发送成功") except smtplib.SMTPException as e: print(e);
def getJson(URL,page,keyword):
global cur_page global room_string
send_headers = { 'Host':'m.ziroom.com', 'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 'Accept':'application/json;version=6', } url = URL +'&page=' + page; req = urllib2.Request(url,headers=send_headers)
content = urllib2.urlopen(req).read().decode('utf-8') data = json.loads(content) rooms = data['data']['rooms']; print(URL) print(content) for room in rooms: tags ='' for tag in room['tags']: tags +=(' ' + (tag['title'])) #print (room['id'] + room['face'] + ' ' + room['name'] + tags + room['price']) cursor = c.execute("SELECT * from ROOM WHERE ZID=" + room['id']) result = cursor.fetchall() if len(result) <= 0: pricedata = room['price'] priceurl = 'http:' + pricedata[0] print(priceurl) path = "image/" + os.path.basename(pricedata[0]) if not os.path.exists(path): res = urllib.urlopen(priceurl).read() f = open(path,"wb") f.write(res) f.close() img = Image.open(path) print(path) imgprice = pytesseract.image_to_string(img,lang='eng',config='-psm 7') print(imgprice) price = '' unit = room['price_unit'] if len(imgprice)>0: for index in pricedata[1]: price =price + imgprice[index] roomInfo = room['id'] + ' ' + room['name'] + ' ' + price + unit
room_string = room_string + roomInfo + '\n' sql = 'INSERT INTO ROOM (ZID,HID,TITLE) VALUES ("' + room['id'] +'","' + room['house_id']+'","' + room['name'] +'")' c.execute(sql ) conn.commit() if len(rooms) > 0: cur_page += 1 getJson(BaseURL + urllib.quote(keyword),str(cur_page),keyword) else: if len(room_string) > 0: print('send email \n' + room_string) sendEmail(room_string,keyword) room_string = '' conn = sqlite3.connect('ziroom.db') c = conn.cursor() c.execute('''CREATE TABLE IF NOT EXISTS ROOM (ID INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, ZID TEXT NOT NULL, HID TEXT NOT NULL, TITLE CHAR(50));''')
for keyword in keywords: cur_page = 1 keywordsquote = urllib.quote(keyword) getJson(BaseURL + keywordsquote,str(cur_page),keyword)
|