Merge remote-tracking branch 'xianhu/master'
commit
6ba9d92a7c
10
README.md
10
README.md
|
@ -1,7 +1,7 @@
|
|||
# LearnPython
|
||||
以撸代码的形式学习Python, 具体说明在[知乎专栏-撸代码,学知识](https://zhuanlan.zhihu.com/pythoner)
|
||||
|
||||
===============================================================================
|
||||
===================================================================================================
|
||||
### python_base.py: 千行代码入门Python
|
||||
|
||||
### python_visual.py: 15张图入门Matplotlib
|
||||
|
@ -23,6 +23,12 @@
|
|||
### python_decorator.py: Python进阶: 通过实例详解装饰器(附代码)
|
||||
|
||||
### python_datetime.py: 你真的了解Python中的日期时间处理吗?
|
||||
===============================================================================
|
||||
|
||||
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
|
||||
|
||||
### python_coroutine.py: Python进阶:理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
|
||||
|
||||
### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例
|
||||
===================================================================================================
|
||||
|
||||
### 您可以fork该项目,并在修改后提交Pull request
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
# _*_ coding: utf-8 _*_
|
||||
|
||||
"""
|
||||
python_aiohttp.py by xianhu
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
|
||||
# 简单实例
|
||||
async def aiohttp_test01(url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as resp:
|
||||
print(resp.status)
|
||||
print(await resp.text())
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = [aiohttp_test01("https://api.github.com/events")]
|
||||
loop.run_until_complete(asyncio.wait(tasks))
|
||||
loop.close()
|
||||
|
||||
# 其他Http方法
|
||||
# session.post('http://httpbin.org/post', data=b'data')
|
||||
# session.put('http://httpbin.org/put', data=b'data')
|
||||
# session.delete('http://httpbin.org/delete')
|
||||
# session.head('http://httpbin.org/get')
|
||||
# session.options('http://httpbin.org/get')
|
||||
# session.patch('http://httpbin.org/patch', data=b'data')
|
||||
|
||||
# 自定义Headers
|
||||
# payload = {'some': 'data'}
|
||||
# headers = {'content-type': 'application/json'}
|
||||
# await session.post(url, data=json.dumps(payload), headers=headers)
|
||||
|
||||
# 自定义Cookie
|
||||
# cookies = {'cookies_are': 'working'}
|
||||
# async with ClientSession(cookies=cookies) as session:
|
||||
# 访问Cookie: session.cookie_jar
|
||||
|
||||
# 在URLs中传递参数
|
||||
# 1. params = {'key1': 'value1', 'key2': 'value2'}
|
||||
# 2. params = [('key', 'value1'), ('key', 'value2')]
|
||||
# async with session.get('http://httpbin.org/get', params=params) as resp:
|
||||
# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1'
|
||||
|
||||
# 发送数据
|
||||
# payload = {'key1': 'value1', 'key2': 'value2'}
|
||||
# async with session.post('http://httpbin.org/post', data=payload) as resp:
|
||||
# async with session.post(url, data=json.dumps(payload)) as resp:
|
||||
# print(await resp.text())
|
||||
|
||||
# 发送文件(1)
|
||||
# files = {'file': open('report.xls', 'rb')}
|
||||
# await session.post(url, data=files)
|
||||
|
||||
# 发送数据(2)
|
||||
# data = FormData()
|
||||
# data.add_field('file',
|
||||
# open('report.xls', 'rb'),
|
||||
# filename='report.xls',
|
||||
# content_type='application/vnd.ms-excel')
|
||||
# await session.post(url, data=data)
|
||||
|
||||
# 超时设置
|
||||
# aync with session.get('https://github.com', timeout=60) as r:
|
||||
|
||||
# 代理支持
|
||||
# async with aiohttp.ClientSession() as session:
|
||||
# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp:
|
||||
# print(resp.status)
|
||||
|
||||
# async with aiohttp.ClientSession() as session:
|
||||
# proxy_auth = aiohttp.BasicAuth('user', 'pass')
|
||||
# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp:
|
||||
# print(resp.status)
|
||||
# session.get("http://python.org", proxy="http://user:pass@some.proxy.com")
|
||||
|
||||
# 返回的内容
|
||||
# async with session.get('https://api.github.com/events') as resp:
|
||||
# print(await resp.text())
|
||||
# print(await resp.text(encoding='gbk'))
|
||||
# print(await resp.read())
|
||||
# print(await resp.json())
|
||||
|
||||
# 返回内容较大
|
||||
# with open(filename, 'wb') as fd:
|
||||
# while True:
|
||||
# chunk = await resp.content.read(chunk_size)
|
||||
# if not chunk:
|
||||
# break
|
||||
# fd.write(chunk)
|
||||
|
||||
# 返回的其他变量
|
||||
# async with session.get('http://httpbin.org/get') as resp:
|
||||
# print(resp.status) # 状态码
|
||||
# print(resp.headers) # Headers
|
||||
# print(resp.raw_headers) # 原始Headers
|
||||
# print(resp.cookies) # 返回的Cookie
|
||||
|
||||
# 访问历史History
|
||||
# resp = await session.get('http://example.com/some/redirect/')
|
||||
# resp: <ClientResponse(http://example.com/some/other/url/) [200]>
|
||||
# resp.history: (<ClientResponse(http://example.com/some/redirect/) [301]>,)
|
||||
|
||||
# 释放返回的Response
|
||||
# 1. async with session.get(url) as resp: pass
|
||||
# 2. await resp.release()
|
||||
|
||||
# 连接器: Connectors
|
||||
# conn = aiohttp.TCPConnector()
|
||||
# session = aiohttp.ClientSession(connector=conn)
|
||||
|
||||
# 限制连接池大小:
|
||||
# conn = aiohttp.TCPConnector(limit=30)
|
||||
# conn = aiohttp.TCPConnector(limit=None)
|
|
@ -0,0 +1,77 @@
|
|||
# _*_ coding: utf-8 _*_
|
||||
|
||||
"""
|
||||
python_coroutine.py by xianhu
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import threading
|
||||
|
||||
|
||||
# 生产者、消费者例子
|
||||
def consumer(): # 定义消费者,由于有yeild关键词,此消费者为一个生成器
|
||||
print("[Consumer] Init Consumer ......")
|
||||
r = "init ok" # 初始化返回结果,并在启动消费者时,返回给生产者
|
||||
while True:
|
||||
n = yield r # 消费者通过yield关键词接收生产者产生的消息,同时返回结果给生产者
|
||||
print("[Consumer] conusme n = %s, r = %s" % (n, r))
|
||||
r = "consume %s OK" % n # 消费者消费结果,下个循环返回给生产者
|
||||
|
||||
|
||||
def produce(c): # 定义生产者,此时的 c 为一个生成器
|
||||
print("[Producer] Init Producer ......")
|
||||
r = c.send(None) # 启动消费者生成器,同时第一次接收返回结果
|
||||
print("[Producer] Start Consumer, return %s" % r)
|
||||
n = 0
|
||||
while n < 5:
|
||||
n += 1
|
||||
print("[Producer] While, Producing %s ......" % n)
|
||||
r = c.send(n) # 向消费者发送消息,同时准备接收结果。此时会切换到消费者执行
|
||||
print("[Producer] Consumer return: %s" % r)
|
||||
c.close() # 关闭消费者生成器
|
||||
print("[Producer] Close Producer ......")
|
||||
|
||||
# produce(consumer())
|
||||
|
||||
|
||||
# 异步IO例子:适配Python3.4,使用asyncio库
|
||||
@asyncio.coroutine
|
||||
def hello(index): # 通过装饰器asyncio.coroutine定义协程
|
||||
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
|
||||
yield from asyncio.sleep(1) # 模拟IO任务
|
||||
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))@asyncio.coroutine
|
||||
|
||||
loop = asyncio.get_event_loop() # 得到一个事件循环模型
|
||||
tasks = [hello(1), hello(2)] # 初始化任务列表
|
||||
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
|
||||
loop.close() # 关闭事件循环列表
|
||||
|
||||
|
||||
# 异步IO例子:适配Python3.5,使用async和await关键字
|
||||
async def hello1(index): # 通过关键字async定义协程
|
||||
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
|
||||
await asyncio.sleep(1) # 模拟IO任务
|
||||
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))
|
||||
|
||||
loop = asyncio.get_event_loop() # 得到一个事件循环模型
|
||||
tasks = [hello1(1), hello1(2)] # 初始化任务列表
|
||||
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
|
||||
loop.close() # 关闭事件循环列表
|
||||
|
||||
|
||||
# aiohttp 实例
|
||||
async def get(url):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as resp:
|
||||
print(url, resp.status)
|
||||
print(url, await resp.text())
|
||||
|
||||
loop = asyncio.get_event_loop() # 得到一个事件循环模型
|
||||
tasks = [ # 初始化任务列表
|
||||
get("http://zhushou.360.cn/detail/index/soft_id/3283370"),
|
||||
get("http://zhushou.360.cn/detail/index/soft_id/3264775"),
|
||||
get("http://zhushou.360.cn/detail/index/soft_id/705490")
|
||||
]
|
||||
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
|
||||
loop.close() # 关闭事件循环列表
|
|
@ -0,0 +1,55 @@
|
|||
# _*_ coding: utf-8 _*_
|
||||
|
||||
"""
|
||||
python_metaclass.py by xianhu
|
||||
"""
|
||||
|
||||
|
||||
class Foo:
|
||||
def hello(self):
|
||||
print("hello world!")
|
||||
return
|
||||
|
||||
foo = Foo()
|
||||
print(type(foo)) # <class '__main__.Foo'>
|
||||
print(type(foo.hello)) # <class 'method'>
|
||||
print(type(Foo)) # <class 'type'>
|
||||
|
||||
temp = Foo # 赋值给其他变量
|
||||
Foo.var = 11 # 增加参数
|
||||
print(Foo) # 作为函数参数
|
||||
|
||||
|
||||
# ========================================================================
|
||||
def init(self, name):
|
||||
self.name = name
|
||||
return
|
||||
|
||||
|
||||
def hello(self):
|
||||
print("hello %s" % self.name)
|
||||
return
|
||||
|
||||
Foo = type("Foo", (object,), {"__init__": init, "hello": hello, "cls_var": 10})
|
||||
foo = Foo("xianhu")
|
||||
print(foo.hello())
|
||||
print(Foo.cls_var)
|
||||
|
||||
print(foo.__class__)
|
||||
print(Foo.__class__)
|
||||
print(type.__class__)
|
||||
# ========================================================================
|
||||
|
||||
|
||||
class Author(type):
|
||||
def __new__(mcs, name, bases, dict):
|
||||
# 添加作者属性
|
||||
dict["author"] = "xianhu"
|
||||
return super(Author, mcs).__new__(mcs, name, bases, dict)
|
||||
|
||||
|
||||
class Foo(object, metaclass=Author):
|
||||
pass
|
||||
|
||||
foo = Foo()
|
||||
print(foo.author)
|
|
@ -4,7 +4,7 @@
|
|||
python_requests.py by xianhu
|
||||
"""
|
||||
|
||||
import requests
|
||||
import requests.adapters
|
||||
|
||||
# 不同方式获取网页内容, 返回一个Response对象, 请求的参数可以为url或Request对象
|
||||
r0 = requests.get("https://github.com/timeline.json")
|
||||
|
@ -181,7 +181,7 @@ requests.get("https://github.com", timeout=(3.05, 27))
|
|||
# 若请求超过了设定的最大重定向次数, 则会抛出一个 TooManyRedirects 异常
|
||||
# 所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException
|
||||
|
||||
# 所有异常
|
||||
# 所有异常:
|
||||
# exception requests.RequestException(*args, **kwargs): There was an ambiguous exception that occurred while handling your request.
|
||||
# exception requests.ConnectionError(*args, **kwargs): A Connection error occurred.
|
||||
# exception requests.HTTPError(*args, **kwargs): An HTTP error occurred.
|
||||
|
@ -223,7 +223,7 @@ proxies = {
|
|||
requests.get("http://example.org", proxies=proxies)
|
||||
# 若代理需要使用HTTP Basic Auth, 可以使用http://user:password@host:port/, 比如"http": "http://user:pass@10.10.1.10:3128/"
|
||||
|
||||
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理
|
||||
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理, 此时需要单独安装:
|
||||
# $ pip install requests[socks]
|
||||
proxies = {
|
||||
"http": "socks5://user:pass@host:port",
|
||||
|
@ -231,5 +231,17 @@ proxies = {
|
|||
}
|
||||
requests.get("http://example.org", proxies=proxies)
|
||||
|
||||
# Requests 传输适配器
|
||||
# 从 v1.0.0 以后,Requests 的内部采用了模块化设计。部分原因是为了实现传输适配器(Transport Adapter)。
|
||||
# 传输适配器提供了一个机制,让你可以为 HTTP 服务定义交互方法。尤其是它允许你应用服务前的配置。
|
||||
# Requests 自带了一个传输适配器,也就是 HTTPAdapter。 这个适配器使用了强大的 urllib3,为 Requests 提供了默认的 HTTP 和 HTTPS 交互。
|
||||
# 每当 Session 被初始化,就会有适配器附着在 Session 上,其中一个供 HTTP 使用,另一个供 HTTPS 使用。
|
||||
# Request 允许用户创建和使用他们自己的传输适配器,实现他们需要的特殊功能。创建好以后,传输适配器可以被加载到一个会话对象上,附带着一个说明,告诉会话适配器应该应用在哪个 web 服务上。
|
||||
s = requests.Session()
|
||||
s.mount("http://baidu.com", requests.adapters.HTTPAdapter())
|
||||
|
||||
# 出现错误: Connection pool is full, discarding connection: xxxx.com
|
||||
s.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
|
||||
|
||||
# 关闭InsecurePlatformWarning
|
||||
# requests.packages.urllib3.disable_warnings()
|
||||
|
|
Loading…
Reference in New Issue