From 18419910fd97f22e319db27428a900340c8a5232 Mon Sep 17 00:00:00 2001 From: injetlee Date: Fri, 9 Dec 2016 14:22:52 +0800 Subject: [PATCH] 12.9 --- ModifyFilename.py | 13 +++++------ biyingSpider.py | 14 ++++++++++++ douban_movie.py | 55 +++++++++++++++++++++++++---------------------- readExcel.py | 16 +++++++------- 4 files changed, 58 insertions(+), 40 deletions(-) create mode 100644 biyingSpider.py diff --git a/ModifyFilename.py b/ModifyFilename.py index c5b9627..466782f 100644 --- a/ModifyFilename.py +++ b/ModifyFilename.py @@ -2,9 +2,10 @@ import os dir = os.getcwd() subdir = os.listdir(dir) for i in subdir: - path = os.path.join(dir,i) - if os.path.isdir(path): - end_dir = os.listdir(path) - for i in range(len(end_dir)): - newname = end_dir[i][0:50] - os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname)) + path = os.path.join(dir, i) + if os.path.isdir(path): + end_dir = os.listdir(path) + for i in range(len(end_dir)): + newname = end_dir[i][0:50] + os.rename(os.path.join(path, end_dir[ + i]), os.path.join(path, newname)) diff --git a/biyingSpider.py b/biyingSpider.py new file mode 100644 index 0000000..86bf645 --- /dev/null +++ b/biyingSpider.py @@ -0,0 +1,14 @@ +import requests +import re +import time +local = time.strftime("%Y.%m.%d") +url = 'http://cn.bing.com/' +con = requests.get(url) +content = con.text +reg = r"(http://s.cn.bing.net/az/hprichbg/rb/.*?.jpg)" +a = re.findall(reg, content, re.S)[0] +print(a) +read = requests.get(a) +f = open('%s.jpg' % local, 'wb') +f.write(read.content) +f.close() diff --git a/douban_movie.py b/douban_movie.py index 0e4109a..887c141 100644 --- a/douban_movie.py +++ b/douban_movie.py @@ -1,12 +1,13 @@ #!/usr/bin/env python # encoding=utf-8 -import requests,re +import requests +import re import codecs from bs4 import BeautifulSoup from openpyxl import Workbook wb = Workbook() dest_filename = '电影.xlsx' -ws1 = wb.active +ws1 = wb.active ws1.title = "电影top250" DOWNLOAD_URL = 'http://movie.douban.com/top250/' @@ -24,56 +25,58 @@ def download_page(url): def get_li(doc): soup = BeautifulSoup(doc, 'html.parser') ol = soup.find('ol', class_='grid_view') - name = [] #名字 - star_con = [] #评价人数 - score = [] #评分 - info_list = [] #短评 + name = [] # 名字 + star_con = [] # 评价人数 + score = [] # 评分 + info_list = [] # 短评 for i in ol.find_all('li'): detail = i.find('div', attrs={'class': 'hd'}) - movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字 - level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分 - star = i.find('div',attrs={'class':'star'}) - star_num = star.find(text=re.compile('评价')) #评价 + movie_name = detail.find( + 'span', attrs={'class': 'title'}).get_text() # 电影名字 + level_star = i.find( + 'span', attrs={'class': 'rating_num'}).get_text() # 评分 + star = i.find('div', attrs={'class': 'star'}) + star_num = star.find(text=re.compile('评价')) # 评价 - info = i.find('span',attrs={'class':'inq'}) #短评 - if info: #判断是否有短评 + info = i.find('span', attrs={'class': 'inq'}) # 短评 + if info: # 判断是否有短评 info_list.append(info.get_text()) else: info_list.append('无') score.append(level_star) - name.append(movie_name) star_con.append(star_num) - page = soup.find('span', attrs={'class': 'next'}).find('a') #获取下一页 + page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页 if page: - return name,star_con,score,info_list,DOWNLOAD_URL + page['href'] - return name,star_con,score,info_list,None + return name, star_con, score, info_list, DOWNLOAD_URL + page['href'] + return name, star_con, score, info_list, None def main(): url = DOWNLOAD_URL name = [] - star_con=[] + star_con = [] score = [] info = [] while url: doc = download_page(url) - movie,star,level_num,info_list,url = get_li(doc) + movie, star, level_num, info_list, url = get_li(doc) name = name + movie star_con = star_con + star - score = score+level_num - info = info+ info_list - for (i,m,o,p) in zip(name,star_con,score,info): - col_A = 'A%s'%(name.index(i)+1) - col_B = 'B%s'%(name.index(i)+1) - col_C = 'C%s'%(name.index(i)+1) - col_D = 'D%s'%(name.index(i)+1) - ws1[col_A]=i + score = score + level_num + info = info + info_list + for (i, m, o, p) in zip(name, star_con, score, info): + col_A = 'A%s' % (name.index(i) + 1) + col_B = 'B%s' % (name.index(i) + 1) + col_C = 'C%s' % (name.index(i) + 1) + col_D = 'D%s' % (name.index(i) + 1) + ws1[col_A] = i ws1[col_B] = m ws1[col_C] = o ws1[col_D] = p wb.save(filename=dest_filename) + if __name__ == '__main__': main() diff --git a/readExcel.py b/readExcel.py index 8871cbe..147d243 100644 --- a/readExcel.py +++ b/readExcel.py @@ -3,15 +3,15 @@ from openpyxl.compat import range from openpyxl.cell import get_column_letter wb = Workbook() dest_filename = 'empty_book2.xlsx' -ws1 = wb.active #第一个表 -ws1.title = "range names" #第一个表命名 -#遍历第一个表的1到40行,赋值一个600内的随机数 -for row in range(1,40): +ws1 = wb.active # 第一个表 +ws1.title = "range names" # 第一个表命名 +# 遍历第一个表的1到40行,赋值一个600内的随机数 +for row in range(1, 40): ws1.append(range(60)) ws2 = wb.create_sheet(title="Pi") ws2['F5'] = 3.14 ws3 = wb.create_sheet(title="Data") -for row in range(10,20): - for col in range(27,54): - _=ws3.cell(column=col,row=row,value="%s" % get_column_letter(col)) -wb.save(filename=dest_filename) \ No newline at end of file +for row in range(10, 20): + for col in range(27, 54): + _ = ws3.cell(column=col, row=row, value="%s" % get_column_letter(col)) +wb.save(filename=dest_filename)