diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c914474 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/no_use \ No newline at end of file diff --git a/CpuToInfluxdb.py b/CpuToInfluxdb.py new file mode 100644 index 0000000..c732844 --- /dev/null +++ b/CpuToInfluxdb.py @@ -0,0 +1,53 @@ +import psutil +import os +from influxdb import InfluxDBClient +import time,math,random + + +#获取当前运行的pid +p1=psutil.Process(os.getpid()) + +#打印本机的内存信息 +#print ('直接打印内存占用: '+(str)(psutil.virtual_memory)) + +#打印内存的占用率 + + + + + + +from influxdb import InfluxDBClient +import time,math,random +while True: + # for i in range(360): + + # sin = round(random.random()*1000,2) + # print (sin) + a = psutil.virtual_memory().percent + +#本机cpu的总占用率 + b = psutil.cpu_percent(interval=1.0) + + json_body = [ + { + "measurement": "cpu_load_short", + "tags": { + "host": "server01", + "region": "us-west" + }, + #"time": "2009-11-10T23:00:00Z", + "fields": { + "cpu": b, + "mem": a + } + } + ] + client = InfluxDBClient('localhost', 8086, 'root', 'root', 'xxyyxx') + print('aaaaaa') + #client.create_database('xxyyxx',if_not_exists=False) + print('bbbbb') + client.write_points(json_body) + #result = client.query('select value from cpu_load_short;') + #print("Result: {0}".format(result)) + time.sleep(2) \ No newline at end of file diff --git a/ModifyFilename.py b/ModifyFilename.py new file mode 100644 index 0000000..c5b9627 --- /dev/null +++ b/ModifyFilename.py @@ -0,0 +1,10 @@ +import os +dir = os.getcwd() +subdir = os.listdir(dir) +for i in subdir: + path = os.path.join(dir,i) + if os.path.isdir(path): + end_dir = os.listdir(path) + for i in range(len(end_dir)): + newname = end_dir[i][0:50] + os.rename(os.path.join(path,end_dir[i]),os.path.join(path,newname)) diff --git a/countFile.py b/countFile.py new file mode 100644 index 0000000..de22f10 --- /dev/null +++ b/countFile.py @@ -0,0 +1,16 @@ +import os +result = [] +def get_all(cwd): + get_dir = os.listdir(cwd) #遍历当前目录,获取文件列表 + for i in get_dir: + sub_dir = os.path.join(cwd,i) # 把第一步获取的文件加入路径 + if os.path.isdir(sub_dir): #如果当前仍然是文件夹,递归调用 + get_all(sub_dir) + else: + ax = os.path.basename(sub_dir) #如果当前路径不是文件夹,则把文件名放入列表 + result.append(ax) + print(len(result)) #对列表计数 + +if __name__ == "__main__": + cur_path = os.getcwd() #当前目录 + get_all(cur_path) \ No newline at end of file diff --git a/countPm.py b/countPm.py new file mode 100644 index 0000000..3c54d69 --- /dev/null +++ b/countPm.py @@ -0,0 +1,23 @@ +# -*- coding:utf-8 -*- +def count_pm(*args): + alist = list([round(i*2-8,2) for i in args]) #计算三种颗粒浓度 + result = [] + for pm in alist: + pm_abs = abs(pm) + result.append(generate_iso_code(pm_abs)) + print (result) + return result + +def generate_iso_code(x): + pm_value = [0.01,0.02,0.04,0.08,0.16,0.32,0.64,1.3,2.5,5,10,20,40,80] #颗粒浓度 + iso = list(range(1,25)) #iso级别,共24级 + for i in range(len(pm_value)): #for循环得到某个浓度范围的iso4006级别 + if pm_value[i] < x <= pm_value[i+1]: + iso_code = iso[i] + break + return iso_code + +if __name__ == '__main__': + count_pm(7.95,5.85,3.98) + count_pm(7.918,5.949,5.456) + count_pm(6.916,3.956,3.956) diff --git a/douban_book.py b/douban_book.py new file mode 100644 index 0000000..5335045 --- /dev/null +++ b/douban_book.py @@ -0,0 +1,55 @@ +from bs4 import BeautifulSoup +import requests +from openpyxl import Workbook +excel_name = "书籍.xlsx" +wb = Workbook() +ws1 = wb.active +ws1.title='书籍' + + +def get_html(url): + header = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'} + html = requests.get(url, headers=header).content + return html + + +def get_con(html): + soup = BeautifulSoup(html,'html.parser') + book_list = soup.find('div', attrs={'class': 'article'}) + page = soup.find('div', attrs={'class': 'paginator'}) + next_page = page.find('span', attrs={'class': 'next'}).find('a') + name = [] + for i in book_list.find_all('table'): + book_name = i.find('div', attrs={'class': 'pl2'}) + m = list(book_name.find('a').stripped_strings) + if len(m)>1: + x = m[0]+m[1] + else: + x = m[0] + #print(x) + name.append(x) + if next_page: + return name, next_page.get('href') + else: + return name, None + + +def main(): + url = 'https://book.douban.com/top250' + name_list=[] + while url: + html = get_html(url) + name, url = get_con(html) + name_list = name_list + name + for i in name_list: + location = 'A%s'%(name_list.index(i)+1) + print(i) + print(location) + ws1[location]=i + wb.save(filename=excel_name) + + +if __name__ == '__main__': + main() + diff --git a/douban_movie.py b/douban_movie.py new file mode 100644 index 0000000..0e4109a --- /dev/null +++ b/douban_movie.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# encoding=utf-8 +import requests,re +import codecs +from bs4 import BeautifulSoup +from openpyxl import Workbook +wb = Workbook() +dest_filename = '电影.xlsx' +ws1 = wb.active +ws1.title = "电影top250" + +DOWNLOAD_URL = 'http://movie.douban.com/top250/' + + +def download_page(url): + """获取url地址页面内容""" + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36' + } + data = requests.get(url, headers=headers).content + return data + + +def get_li(doc): + soup = BeautifulSoup(doc, 'html.parser') + ol = soup.find('ol', class_='grid_view') + name = [] #名字 + star_con = [] #评价人数 + score = [] #评分 + info_list = [] #短评 + for i in ol.find_all('li'): + detail = i.find('div', attrs={'class': 'hd'}) + movie_name = detail.find('span', attrs={'class': 'title'}).get_text() #电影名字 + level_star = i.find('span',attrs={'class':'rating_num'}).get_text() #评分 + star = i.find('div',attrs={'class':'star'}) + star_num = star.find(text=re.compile('评价')) #评价 + + info = i.find('span',attrs={'class':'inq'}) #短评 + if info: #判断是否有短评 + info_list.append(info.get_text()) + else: + info_list.append('无') + score.append(level_star) + + + name.append(movie_name) + star_con.append(star_num) + page = soup.find('span', attrs={'class': 'next'}).find('a') #获取下一页 + if page: + return name,star_con,score,info_list,DOWNLOAD_URL + page['href'] + return name,star_con,score,info_list,None + + +def main(): + url = DOWNLOAD_URL + name = [] + star_con=[] + score = [] + info = [] + while url: + doc = download_page(url) + movie,star,level_num,info_list,url = get_li(doc) + name = name + movie + star_con = star_con + star + score = score+level_num + info = info+ info_list + for (i,m,o,p) in zip(name,star_con,score,info): + col_A = 'A%s'%(name.index(i)+1) + col_B = 'B%s'%(name.index(i)+1) + col_C = 'C%s'%(name.index(i)+1) + col_D = 'D%s'%(name.index(i)+1) + ws1[col_A]=i + ws1[col_B] = m + ws1[col_C] = o + ws1[col_D] = p + wb.save(filename=dest_filename) + +if __name__ == '__main__': + main() diff --git a/excelToDatabase.py b/excelToDatabase.py new file mode 100644 index 0000000..215598f --- /dev/null +++ b/excelToDatabase.py @@ -0,0 +1,32 @@ +from openpyxl import load_workbook +import pymysql +config = { + 'host': '127.0.0.1', + 'port':3306, + 'user': 'root', + 'password': 'root', + 'charset': 'utf8mb4', + #'cursorclass': pymysql.cursors.DictCursor + +} +conn = pymysql.connect(**config) +conn.autocommit(1) +cursor = conn.cursor() +name = 'lyexcel' +cursor.execute('create database if not exists %s' %name) +conn.select_db(name) +table_name = 'info' +cursor.execute('create table if not exists %s(id MEDIUMINT NOT NULL AUTO_INCREMENT,name varchar(30),tel varchar(30),primary key (id))'%table_name) + +wb2 = load_workbook('hpu.xlsx') +ws=wb2.get_sheet_names() +for row in wb2: + print("1") + for cell in row: + value1=(cell[0].value,cell[4].value) + cursor.execute('insert into info (name,tel) values(%s,%s)',value1) + +print("overing...") +# for row in A: +# print(row) +#print (wb2.get_sheet_names()) diff --git a/lagouSpider.py b/lagouSpider.py new file mode 100644 index 0000000..8432ce6 --- /dev/null +++ b/lagouSpider.py @@ -0,0 +1,37 @@ +import requests +from openpyxl import Workbook + +def get_json(url, page, lang_name): + data = {'first': 'true', 'pn': page, 'kd': lang_name} + json = requests.post(url, data).json() + list_con = json['content']['positionResult']['result'] + info_list = [] + for i in list_con: + info = [] + info.append(i['companyShortName']) + info.append(i['companyName']) + info.append(i['salary']) + info.append(i['city']) + info.append(i['education']) + info_list.append(info) + return info_list + + +def main(): + lang_name = input('职位名:') + page = 1 + url = 'http://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false' + info_result = [] + while page < 31: + info = get_json(url, page, lang_name) + info_result = info_result + info + page += 1 + wb = Workbook() + ws1 = wb.active + ws1.title = lang_name + for row in info_result: + ws1.append(row) + wb.save('职位信息.xlsx') + +if __name__ == '__main__': + main() diff --git a/login_zhihu.py b/login_zhihu.py new file mode 100644 index 0000000..589bdb8 --- /dev/null +++ b/login_zhihu.py @@ -0,0 +1,30 @@ +import requests,time +from bs4 import BeautifulSoup +url = 'https://www.zhihu.com/login/email' +def get_captcha(data): + with open('captcha.gif','wb') as fb: + fb.write(data) + return input('captcha') + +def login(username,password,oncaptcha): + sessiona = requests.Session() + print(sessiona) + print('aaaa') + headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'} + xyz = sessiona.get('https://www.zhihu.com/#signin',headers=headers).content + _xsrf = BeautifulSoup(sessiona.get('https://www.zhihu.com/#signin',headers=headers).content,'html.parser').find('input',attrs={'name':'_xsrf'}).get('value') + captcha_content = sessiona.get('https://www.zhihu.com/captcha.gif?r=%d&type=login'%(time.time()*1000),headers=headers).content + data = { + "_xsrf":_xsrf, + "email":username, + "password":password, + "remember_me":True, + "captcha":oncaptcha(captcha_content) + } + print(data) + resp = sessiona.post('https://www.zhihu.com/login/email',data,headers=headers).content + print(resp) + return resp + +if __name__ == "__main__": + login('email','password',get_captcha) \ No newline at end of file diff --git a/readExcel.py b/readExcel.py new file mode 100644 index 0000000..df2f11a --- /dev/null +++ b/readExcel.py @@ -0,0 +1,17 @@ +from openpyxl import Workbook +from openpyxl.compat import range +from openpyxl.cell import get_column_letter +wb = Workbook() +dest_filename = 'empty_book2.xlsx' +ws1 = wb.active #第一个表 +ws1.title = "range names" #第一个表命名 +#遍历第一个表的1到40行,赋值一个600内的随机数。 +for row in range(1,40): + ws1.append(range(60)) +ws2 = wb.create_sheet(title="Pi") +ws2['F5'] = 3.14 +ws3 = wb.create_sheet(title="Data") +for row in range(10,20): + for col in range(27,54): + _=ws3.cell(column=col,row=row,value="%s" % get_column_letter(col)) +wb.save(filename=dest_filename) \ No newline at end of file