-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy path06-JDprice.py
More file actions
118 lines (95 loc) · 3.59 KB
/
06-JDprice.py
File metadata and controls
118 lines (95 loc) · 3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'''
简单爬虫获取京东的商品价格
'''
'''
方法:通过京东移动商城(因为它没有把价格藏在js中)
'''
import urllib.request
import re
import xlwt
class JDGoods(object):
jdid = ''
jdname = ''
jdprice = ''
def __init__(self):
super(JDGoods, self).__init__()
# 通过京东移动接口
# 参数url:京东原本的商品网址
def get_jd_price(id):
jdGoods = JDGoods()
url = 'http://item.jd.com/'+str(id)+'.html' #原本的网址
jdGoods.jdid = re.search(r'/(\d+)\.html', url).group(1) #原本的网址提取出商品ID
url = 'http://m.jd.com/product/'+jdGoods.jdid+'.html' #转换成为移动商城的url
#通过对源代码进行utf-8解码
html = urllib.request.urlopen(url).read().decode('utf-8')
# 获取重定向后的地址
try:
url = re.search(r'returnUrl=(.*)\"', html).group(1)
html = urllib.request.urlopen(url).read().decode('utf-8')
except:
pass
# 提取商品名称
m = re.search(r'<span class="title-text">(.*?)<i.*?/i></span>',html,re.S)
if m:
jdGoods.jdname = m.group(1)
# 提取出商品价格
# 匹配时指定re.S可以让点匹配所有字符,包括换行符
m = re.search(r'<div class="prod-price">.*?<span>(.*?)</span>([^\s]*).*</div>',html,re.S)
if m:
jdGoods.jdprice = m.group(2)
print('商品ID:%s' % jdGoods.jdid)
print('商品名称:%s' % jdGoods.jdname)
print('商品价格:%s' % jdGoods.jdprice)
return jdGoods
if __name__ != '__main__':
id = 1119429
get_jd_price(id)
for x in range(1,10):
id = 1119452 + x
get_jd_price(id)
'''
运行效果:
商品ID:1119429
商品名称:丹姿水密码冰川矿泉洁肤晶露100g(洗面奶 深层清洁 温和保湿)
商品价格:9.90
商品ID:1119453
商品名称:惠普(HP) CN053AA 932XL 超大号黑彩墨盒套装墨盒 (含1支黑,3支彩,购买时彩色显示为附件)
商品价格:577.00
商品ID:1119454
商品名称:嘉速(Jiasu) 尼康D3300 单反相机专用 高透防刮屏幕保护膜/贴膜
商品价格:18.90
商品ID:1119455
商品名称:麦富迪宠物零食 纯天然鸡胸肉卷牛皮狗咬胶200g*2袋
商品价格:69.00
商品ID:1119456
商品名称:麦富迪 狗粮 金毛专用成犬粮10kg
商品价格:
'''
'''
将价格结果存入Excel
'''
# 生成a,b两个jdid之间的List
def get_jd_price_list(a, b):
jd_list = []
for x in range(a, b):
jd_list.append(get_jd_price(x))
return jd_list
# 生成a,b两个jdid之间的Excel
def get_jd_price_excel(a,b):
wbk = xlwt.Workbook(encoding = 'utf-8')
sheet = wbk.add_sheet('sheet 1', cell_overwrite_ok=True)
sheet.write(0,0,'number')
sheet.write(0,1,'jdid')
sheet.write(0,2,'jdname')
sheet.write(0,3,'jdprice')
jd_lists = get_jd_price_list(a,b)
for x in range(len(jd_lists)):
jdGoods = jd_lists[x]
sheet.write(x+1, 0, x+1)
sheet.write(x+1, 1, jdGoods.jdid)
sheet.write(x+1, 2, jdGoods.jdname)
sheet.write(x+1, 3, jdGoods.jdprice)
wbk.save('result//06-JDprice//jdprice'+str(a)+'-'+str(b)+'.xls')
print('生成 Excel 成功!')
if __name__ == '__main__':
get_jd_price_excel(1119450,1119550)