pull/6/head
injetlee 2016-12-09 14:22:52 +08:00
parent 04ecaaaa5e
commit 18419910fd
4 changed files with 58 additions and 40 deletions

View File

@ -2,9 +2,10 @@ import os
dir = os.getcwd() dir = os.getcwd()
subdir = os.listdir(dir) subdir = os.listdir(dir)
for i in subdir: for i in subdir:
path = os.path.join(dir,i) path = os.path.join(dir, i)
if os.path.isdir(path): if os.path.isdir(path):
end_dir = os.listdir(path) end_dir = os.listdir(path)
for i in range(len(end_dir)): for i in range(len(end_dir)):
newname = end_dir[i][0:50] newname = end_dir[i][0:50]
os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname)) os.rename(os.path.join(path, end_dir[
i]), os.path.join(path, newname))

14
biyingSpider.py 100644
View File

@ -0,0 +1,14 @@
import requests
import re
import time
local = time.strftime("%Y.%m.%d")
url = 'http://cn.bing.com/'
con = requests.get(url)
content = con.text
reg = r"(http://s.cn.bing.net/az/hprichbg/rb/.*?.jpg)"
a = re.findall(reg, content, re.S)[0]
print(a)
read = requests.get(a)
f = open('%s.jpg' % local, 'wb')
f.write(read.content)
f.close()

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding=utf-8 # encoding=utf-8
import requests,re import requests
import re
import codecs import codecs
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from openpyxl import Workbook from openpyxl import Workbook
@ -24,56 +25,58 @@ def download_page(url):
def get_li(doc): def get_li(doc):
soup = BeautifulSoup(doc, 'html.parser') soup = BeautifulSoup(doc, 'html.parser')
ol = soup.find('ol', class_='grid_view') ol = soup.find('ol', class_='grid_view')
name = [] #名字 name = [] # 名字
star_con = [] #评价人数 star_con = [] # 评价人数
score = [] #评分 score = [] # 评分
info_list = [] #短评 info_list = [] # 短评
for i in ol.find_all('li'): for i in ol.find_all('li'):
detail = i.find('div', attrs={'class': 'hd'}) detail = i.find('div', attrs={'class': 'hd'})
movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字 movie_name = detail.find(
level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分 'span', attrs={'class': 'title'}).get_text() # 电影名字
star = i.find('div',attrs={'class':'star'}) level_star = i.find(
star_num = star.find(text=re.compile('评价')) #评价 'span', attrs={'class': 'rating_num'}).get_text() # 评分
star = i.find('div', attrs={'class': 'star'})
star_num = star.find(text=re.compile('评价')) # 评价
info = i.find('span',attrs={'class':'inq'}) #短评 info = i.find('span', attrs={'class': 'inq'}) # 短评
if info: #判断是否有短评 if info: # 判断是否有短评
info_list.append(info.get_text()) info_list.append(info.get_text())
else: else:
info_list.append('') info_list.append('')
score.append(level_star) score.append(level_star)
name.append(movie_name) name.append(movie_name)
star_con.append(star_num) star_con.append(star_num)
page = soup.find('span', attrs={'class': 'next'}).find('a') #获取下一页 page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页
if page: if page:
return name,star_con,score,info_list,DOWNLOAD_URL + page['href'] return name, star_con, score, info_list, DOWNLOAD_URL + page['href']
return name,star_con,score,info_list,None return name, star_con, score, info_list, None
def main(): def main():
url = DOWNLOAD_URL url = DOWNLOAD_URL
name = [] name = []
star_con=[] star_con = []
score = [] score = []
info = [] info = []
while url: while url:
doc = download_page(url) doc = download_page(url)
movie,star,level_num,info_list,url = get_li(doc) movie, star, level_num, info_list, url = get_li(doc)
name = name + movie name = name + movie
star_con = star_con + star star_con = star_con + star
score = score+level_num score = score + level_num
info = info+ info_list info = info + info_list
for (i,m,o,p) in zip(name,star_con,score,info): for (i, m, o, p) in zip(name, star_con, score, info):
col_A = 'A%s'%(name.index(i)+1) col_A = 'A%s' % (name.index(i) + 1)
col_B = 'B%s'%(name.index(i)+1) col_B = 'B%s' % (name.index(i) + 1)
col_C = 'C%s'%(name.index(i)+1) col_C = 'C%s' % (name.index(i) + 1)
col_D = 'D%s'%(name.index(i)+1) col_D = 'D%s' % (name.index(i) + 1)
ws1[col_A]=i ws1[col_A] = i
ws1[col_B] = m ws1[col_B] = m
ws1[col_C] = o ws1[col_C] = o
ws1[col_D] = p ws1[col_D] = p
wb.save(filename=dest_filename) wb.save(filename=dest_filename)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -3,15 +3,15 @@ from openpyxl.compat import range
from openpyxl.cell import get_column_letter from openpyxl.cell import get_column_letter
wb = Workbook() wb = Workbook()
dest_filename = 'empty_book2.xlsx' dest_filename = 'empty_book2.xlsx'
ws1 = wb.active #第一个表 ws1 = wb.active # 第一个表
ws1.title = "range names" #第一个表命名 ws1.title = "range names" # 第一个表命名
#遍历第一个表的1到40行赋值一个600内的随机数 # 遍历第一个表的1到40行赋值一个600内的随机数
for row in range(1,40): for row in range(1, 40):
ws1.append(range(60)) ws1.append(range(60))
ws2 = wb.create_sheet(title="Pi") ws2 = wb.create_sheet(title="Pi")
ws2['F5'] = 3.14 ws2['F5'] = 3.14
ws3 = wb.create_sheet(title="Data") ws3 = wb.create_sheet(title="Data")
for row in range(10,20): for row in range(10, 20):
for col in range(27,54): for col in range(27, 54):
_=ws3.cell(column=col,row=row,value="%s" % get_column_letter(col)) _ = ws3.cell(column=col, row=row, value="%s" % get_column_letter(col))
wb.save(filename=dest_filename) wb.save(filename=dest_filename)