mirror of https://github.com/injetlee/Python.git
12.9
parent
04ecaaaa5e
commit
18419910fd
|
@ -2,9 +2,10 @@ import os
|
||||||
dir = os.getcwd()
|
dir = os.getcwd()
|
||||||
subdir = os.listdir(dir)
|
subdir = os.listdir(dir)
|
||||||
for i in subdir:
|
for i in subdir:
|
||||||
path = os.path.join(dir,i)
|
path = os.path.join(dir, i)
|
||||||
if os.path.isdir(path):
|
if os.path.isdir(path):
|
||||||
end_dir = os.listdir(path)
|
end_dir = os.listdir(path)
|
||||||
for i in range(len(end_dir)):
|
for i in range(len(end_dir)):
|
||||||
newname = end_dir[i][0:50]
|
newname = end_dir[i][0:50]
|
||||||
os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname))
|
os.rename(os.path.join(path, end_dir[
|
||||||
|
i]), os.path.join(path, newname))
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
local = time.strftime("%Y.%m.%d")
|
||||||
|
url = 'http://cn.bing.com/'
|
||||||
|
con = requests.get(url)
|
||||||
|
content = con.text
|
||||||
|
reg = r"(http://s.cn.bing.net/az/hprichbg/rb/.*?.jpg)"
|
||||||
|
a = re.findall(reg, content, re.S)[0]
|
||||||
|
print(a)
|
||||||
|
read = requests.get(a)
|
||||||
|
f = open('%s.jpg' % local, 'wb')
|
||||||
|
f.write(read.content)
|
||||||
|
f.close()
|
|
@ -1,12 +1,13 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# encoding=utf-8
|
# encoding=utf-8
|
||||||
import requests,re
|
import requests
|
||||||
|
import re
|
||||||
import codecs
|
import codecs
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from openpyxl import Workbook
|
from openpyxl import Workbook
|
||||||
wb = Workbook()
|
wb = Workbook()
|
||||||
dest_filename = '电影.xlsx'
|
dest_filename = '电影.xlsx'
|
||||||
ws1 = wb.active
|
ws1 = wb.active
|
||||||
ws1.title = "电影top250"
|
ws1.title = "电影top250"
|
||||||
|
|
||||||
DOWNLOAD_URL = 'http://movie.douban.com/top250/'
|
DOWNLOAD_URL = 'http://movie.douban.com/top250/'
|
||||||
|
@ -24,56 +25,58 @@ def download_page(url):
|
||||||
def get_li(doc):
|
def get_li(doc):
|
||||||
soup = BeautifulSoup(doc, 'html.parser')
|
soup = BeautifulSoup(doc, 'html.parser')
|
||||||
ol = soup.find('ol', class_='grid_view')
|
ol = soup.find('ol', class_='grid_view')
|
||||||
name = [] #名字
|
name = [] # 名字
|
||||||
star_con = [] #评价人数
|
star_con = [] # 评价人数
|
||||||
score = [] #评分
|
score = [] # 评分
|
||||||
info_list = [] #短评
|
info_list = [] # 短评
|
||||||
for i in ol.find_all('li'):
|
for i in ol.find_all('li'):
|
||||||
detail = i.find('div', attrs={'class': 'hd'})
|
detail = i.find('div', attrs={'class': 'hd'})
|
||||||
movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字
|
movie_name = detail.find(
|
||||||
level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分
|
'span', attrs={'class': 'title'}).get_text() # 电影名字
|
||||||
star = i.find('div',attrs={'class':'star'})
|
level_star = i.find(
|
||||||
star_num = star.find(text=re.compile('评价')) #评价
|
'span', attrs={'class': 'rating_num'}).get_text() # 评分
|
||||||
|
star = i.find('div', attrs={'class': 'star'})
|
||||||
|
star_num = star.find(text=re.compile('评价')) # 评价
|
||||||
|
|
||||||
info = i.find('span',attrs={'class':'inq'}) #短评
|
info = i.find('span', attrs={'class': 'inq'}) # 短评
|
||||||
if info: #判断是否有短评
|
if info: # 判断是否有短评
|
||||||
info_list.append(info.get_text())
|
info_list.append(info.get_text())
|
||||||
else:
|
else:
|
||||||
info_list.append('无')
|
info_list.append('无')
|
||||||
score.append(level_star)
|
score.append(level_star)
|
||||||
|
|
||||||
|
|
||||||
name.append(movie_name)
|
name.append(movie_name)
|
||||||
star_con.append(star_num)
|
star_con.append(star_num)
|
||||||
page = soup.find('span', attrs={'class': 'next'}).find('a') #获取下一页
|
page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页
|
||||||
if page:
|
if page:
|
||||||
return name,star_con,score,info_list,DOWNLOAD_URL + page['href']
|
return name, star_con, score, info_list, DOWNLOAD_URL + page['href']
|
||||||
return name,star_con,score,info_list,None
|
return name, star_con, score, info_list, None
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
url = DOWNLOAD_URL
|
url = DOWNLOAD_URL
|
||||||
name = []
|
name = []
|
||||||
star_con=[]
|
star_con = []
|
||||||
score = []
|
score = []
|
||||||
info = []
|
info = []
|
||||||
while url:
|
while url:
|
||||||
doc = download_page(url)
|
doc = download_page(url)
|
||||||
movie,star,level_num,info_list,url = get_li(doc)
|
movie, star, level_num, info_list, url = get_li(doc)
|
||||||
name = name + movie
|
name = name + movie
|
||||||
star_con = star_con + star
|
star_con = star_con + star
|
||||||
score = score+level_num
|
score = score + level_num
|
||||||
info = info+ info_list
|
info = info + info_list
|
||||||
for (i,m,o,p) in zip(name,star_con,score,info):
|
for (i, m, o, p) in zip(name, star_con, score, info):
|
||||||
col_A = 'A%s'%(name.index(i)+1)
|
col_A = 'A%s' % (name.index(i) + 1)
|
||||||
col_B = 'B%s'%(name.index(i)+1)
|
col_B = 'B%s' % (name.index(i) + 1)
|
||||||
col_C = 'C%s'%(name.index(i)+1)
|
col_C = 'C%s' % (name.index(i) + 1)
|
||||||
col_D = 'D%s'%(name.index(i)+1)
|
col_D = 'D%s' % (name.index(i) + 1)
|
||||||
ws1[col_A]=i
|
ws1[col_A] = i
|
||||||
ws1[col_B] = m
|
ws1[col_B] = m
|
||||||
ws1[col_C] = o
|
ws1[col_C] = o
|
||||||
ws1[col_D] = p
|
ws1[col_D] = p
|
||||||
wb.save(filename=dest_filename)
|
wb.save(filename=dest_filename)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
16
readExcel.py
16
readExcel.py
|
@ -3,15 +3,15 @@ from openpyxl.compat import range
|
||||||
from openpyxl.cell import get_column_letter
|
from openpyxl.cell import get_column_letter
|
||||||
wb = Workbook()
|
wb = Workbook()
|
||||||
dest_filename = 'empty_book2.xlsx'
|
dest_filename = 'empty_book2.xlsx'
|
||||||
ws1 = wb.active #第一个表
|
ws1 = wb.active # 第一个表
|
||||||
ws1.title = "range names" #第一个表命名
|
ws1.title = "range names" # 第一个表命名
|
||||||
#遍历第一个表的1到40行,赋值一个600内的随机数
|
# 遍历第一个表的1到40行,赋值一个600内的随机数
|
||||||
for row in range(1,40):
|
for row in range(1, 40):
|
||||||
ws1.append(range(60))
|
ws1.append(range(60))
|
||||||
ws2 = wb.create_sheet(title="Pi")
|
ws2 = wb.create_sheet(title="Pi")
|
||||||
ws2['F5'] = 3.14
|
ws2['F5'] = 3.14
|
||||||
ws3 = wb.create_sheet(title="Data")
|
ws3 = wb.create_sheet(title="Data")
|
||||||
for row in range(10,20):
|
for row in range(10, 20):
|
||||||
for col in range(27,54):
|
for col in range(27, 54):
|
||||||
_=ws3.cell(column=col,row=row,value="%s" % get_column_letter(col))
|
_ = ws3.cell(column=col, row=row, value="%s" % get_column_letter(col))
|
||||||
wb.save(filename=dest_filename)
|
wb.save(filename=dest_filename)
|
||||||
|
|
Loading…
Reference in New Issue