Python抓取JSON网页内容

# -*- coding: UTF-8 -*-
'''
Created on 20150206

@author: Hansen
'''

import urllib2
import sys
import io
import json

#Fetch HTML from URL
def fecth_html(index,url,keepHtml,resultFile):
    req = urllib2.Request(url)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0')
    rsp = urllib2.urlopen(req)
    content = rsp.read()
    #receive_header = rsp.info()
    #print(sys.getfilesystemencoding())
    #content = content.decode('utf-8','replace')
    
    if keepHtml:
        fileinfo = open(str(index)+'.html','w')
        fileinfo.write(content)
        print("save file "+ str(index)+'.html:    ok')
        
    parse_json(content,resultFile)
    
#Parse HTML
def parse_json(content,resultFile):
    jsonData = json.loads(content)
    shops = jsonData['shopBeans']
    print(len(shops))
    for shop in shops:
        szTitle = shop['filterFullName']
        szTitle = szTitle.replace("\r\n", "-").replace(" ","");
        szStar = shop['shopPowerTitle']
        szMeanPrice = str(shop['avgPrice'])
        szMeanPrice = szMeanPrice.replace("\n", "").replace(" ","");
        szAddressA = shop['mainRegionName']
        szAddressB = shop['address']
        szAddress = (szAddressA+"-"+szAddressB).replace("\r\n", "-").replace(" ","");
        szTaste = shop['refinedScore1']
        szEvn = shop['refinedScore2']
        szService = shop['refinedScore3']
        
        fileinfo = io.open(resultFile,'a',encoding='utf_16')
        fileinfo.write(szTitle+","+szStar+","+szMeanPrice+","+szAddress+"," +szTaste+","+szEvn+","+szService+"\n")

Comments are closed.