add python_aiohttp.py

master
xianhu 2016-12-03 21:59:12 +08:00
parent c2de969c7e
commit bccf92026e
2 changed files with 118 additions and 0 deletions

View File

@ -27,6 +27,8 @@
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
### python_coroutine.py: Python进阶理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例
===================================================================================================
### 您可以fork该项目,并在修改后提交Pull request

116
python_aiohttp.py 100644
View File

@ -0,0 +1,116 @@
# _*_ coding: utf-8 _*_
"""
python_aiohttp.py by xianhu
"""
import asyncio
import aiohttp
# 简单实例
async def aiohttp_test01(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
print(resp.status)
print(await resp.text())
loop = asyncio.get_event_loop()
tasks = [aiohttp_test01("https://api.github.com/events")]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
# 其他Http方法
# session.post('http://httpbin.org/post', data=b'data')
# session.put('http://httpbin.org/put', data=b'data')
# session.delete('http://httpbin.org/delete')
# session.head('http://httpbin.org/get')
# session.options('http://httpbin.org/get')
# session.patch('http://httpbin.org/patch', data=b'data')
# 自定义Headers
# payload = {'some': 'data'}
# headers = {'content-type': 'application/json'}
# await session.post(url, data=json.dumps(payload), headers=headers)
# 自定义Cookie
# cookies = {'cookies_are': 'working'}
# async with ClientSession(cookies=cookies) as session:
# 访问Cookie: session.cookie_jar
# 在URLs中传递参数
# 1. params = {'key1': 'value1', 'key2': 'value2'}
# 2. params = [('key', 'value1'), ('key', 'value2')]
# async with session.get('http://httpbin.org/get', params=params) as resp:
# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1'
# 发送数据
# payload = {'key1': 'value1', 'key2': 'value2'}
# async with session.post('http://httpbin.org/post', data=payload) as resp:
# async with session.post(url, data=json.dumps(payload)) as resp:
# print(await resp.text())
# 发送文件(1)
# files = {'file': open('report.xls', 'rb')}
# await session.post(url, data=files)
# 发送数据(2)
# data = FormData()
# data.add_field('file',
# open('report.xls', 'rb'),
# filename='report.xls',
# content_type='application/vnd.ms-excel')
# await session.post(url, data=data)
# 超时设置
# aync with session.get('https://github.com', timeout=60) as r:
# 代理支持
# async with aiohttp.ClientSession() as session:
# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp:
# print(resp.status)
# async with aiohttp.ClientSession() as session:
# proxy_auth = aiohttp.BasicAuth('user', 'pass')
# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp:
# print(resp.status)
# session.get("http://python.org", proxy="http://user:pass@some.proxy.com")
# 返回的内容
# async with session.get('https://api.github.com/events') as resp:
# print(await resp.text())
# print(await resp.text(encoding='gbk'))
# print(await resp.read())
# print(await resp.json())
# 返回内容较大
# with open(filename, 'wb') as fd:
# while True:
# chunk = await resp.content.read(chunk_size)
# if not chunk:
# break
# fd.write(chunk)
# 返回的其他变量
# async with session.get('http://httpbin.org/get') as resp:
# print(resp.status) # 状态码
# print(resp.headers) # Headers
# print(resp.raw_headers) # 原始Headers
# print(resp.cookies) # 返回的Cookie
# 访问历史History
# resp = await session.get('http://example.com/some/redirect/')
# resp: <ClientResponse(http://example.com/some/other/url/) [200]>
# resp.history: (<ClientResponse(http://example.com/some/redirect/) [301]>,)
# 释放返回的Response
# 1. async with session.get(url) as resp: pass
# 2. await resp.release()
# 连接器: Connectors
# conn = aiohttp.TCPConnector()
# session = aiohttp.ClientSession(connector=conn)
# 限制连接池大小:
# conn = aiohttp.TCPConnector(limit=30)
# conn = aiohttp.TCPConnector(limit=None)