pull/6/head
injetlee 2016-12-09 14:22:52 +08:00
parent 04ecaaaa5e
commit 18419910fd
4 changed files with 58 additions and 40 deletions

View File

@ -7,4 +7,5 @@ for i in subdir:
end_dir = os.listdir(path)
for i in range(len(end_dir)):
newname = end_dir[i][0:50]
os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname))
os.rename(os.path.join(path, end_dir[
i]), os.path.join(path, newname))

14
biyingSpider.py 100644
View File

@ -0,0 +1,14 @@
import requests
import re
import time
local = time.strftime("%Y.%m.%d")
url = 'http://cn.bing.com/'
con = requests.get(url)
content = con.text
reg = r"(http://s.cn.bing.net/az/hprichbg/rb/.*?.jpg)"
a = re.findall(reg, content, re.S)[0]
print(a)
read = requests.get(a)
f = open('%s.jpg' % local, 'wb')
f.write(read.content)
f.close()

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
# encoding=utf-8
import requests,re
import requests
import re
import codecs
from bs4 import BeautifulSoup
from openpyxl import Workbook
@ -30,8 +31,10 @@ def get_li(doc):
info_list = [] # 短评
for i in ol.find_all('li'):
detail = i.find('div', attrs={'class': 'hd'})
movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字
level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分
movie_name = detail.find(
'span', attrs={'class': 'title'}).get_text() # 电影名字
level_star = i.find(
'span', attrs={'class': 'rating_num'}).get_text() # 评分
star = i.find('div', attrs={'class': 'star'})
star_num = star.find(text=re.compile('评价')) # 评价
@ -42,7 +45,6 @@ def get_li(doc):
info_list.append('')
score.append(level_star)
name.append(movie_name)
star_con.append(star_num)
page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页
@ -75,5 +77,6 @@ def main():
ws1[col_D] = p
wb.save(filename=dest_filename)
if __name__ == '__main__':
main()