for i in range(11348876,11348999):#數字表明京東商品編號 URL8='http://item.jd.com/%s.html'%(i) page=urllib.request.urlopen(URL8).read() #page=urllib.urlopen(URL).read() pagenew = page.decode("GBK") idx=pagenew.find('product:') if(idx>=0): idx+= 8 res = pagenew[idx:pagenew.find('};')] res=res.strip() addedSingleQuoteJsonStr = re.sub(r"(,?)(\w+?)\s*?:", r"\1'\2':", res); doubleQuotedJsonStr = addedSingleQuoteJsonStr.replace("'", "\""); doubleQuotedJsonStr = doubleQuotedJsonStr.replace("\"http\"", "http"); print(doubleQuotedJsonStr) text=JSONDecoder().decode(doubleQuotedJsonStr)#用json讀取 print(text) print("%s,%s,%s,%s,%s"%(text['skuid'],text['wMaprice'],text['name'],text['href'],text['jqimg']))
記錄幾個知識點:html
1:python
ValueError: Expecting property name: line 1 column 1 (char 1)json
類型的錯誤,就是因爲JSON中,標準語法中,不支持單引號,ui
屬性或者屬性值,都必須是雙引號括起來的。url
因此,能夠用相似於:spa
addedSingleQuoteJsonStr
=
re.sub(r
"(,?)(\w+?)\s*?:"
, r
"\1'\2':"
, orginalJsonStr);
doubleQuotedJsonStr
=
addedSingleQuoteJsonStr.replace(
"'"
,
"\""
);
給屬性添加單引號;code
把全部的單引號替換成雙引號;orm