add python_aiohttp.py
parent
c2de969c7e
commit
bccf92026e
|
@ -27,6 +27,8 @@
|
||||||
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
|
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
|
||||||
|
|
||||||
### python_coroutine.py: Python进阶:理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
|
### python_coroutine.py: Python进阶:理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
|
||||||
|
|
||||||
|
### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例
|
||||||
===================================================================================================
|
===================================================================================================
|
||||||
|
|
||||||
### 您可以fork该项目,并在修改后提交Pull request
|
### 您可以fork该项目,并在修改后提交Pull request
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
# _*_ coding: utf-8 _*_
|
||||||
|
|
||||||
|
"""
|
||||||
|
python_aiohttp.py by xianhu
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
|
# 简单实例
|
||||||
|
async def aiohttp_test01(url):
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(url) as resp:
|
||||||
|
print(resp.status)
|
||||||
|
print(await resp.text())
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
tasks = [aiohttp_test01("https://api.github.com/events")]
|
||||||
|
loop.run_until_complete(asyncio.wait(tasks))
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
# 其他Http方法
|
||||||
|
# session.post('http://httpbin.org/post', data=b'data')
|
||||||
|
# session.put('http://httpbin.org/put', data=b'data')
|
||||||
|
# session.delete('http://httpbin.org/delete')
|
||||||
|
# session.head('http://httpbin.org/get')
|
||||||
|
# session.options('http://httpbin.org/get')
|
||||||
|
# session.patch('http://httpbin.org/patch', data=b'data')
|
||||||
|
|
||||||
|
# 自定义Headers
|
||||||
|
# payload = {'some': 'data'}
|
||||||
|
# headers = {'content-type': 'application/json'}
|
||||||
|
# await session.post(url, data=json.dumps(payload), headers=headers)
|
||||||
|
|
||||||
|
# 自定义Cookie
|
||||||
|
# cookies = {'cookies_are': 'working'}
|
||||||
|
# async with ClientSession(cookies=cookies) as session:
|
||||||
|
# 访问Cookie: session.cookie_jar
|
||||||
|
|
||||||
|
# 在URLs中传递参数
|
||||||
|
# 1. params = {'key1': 'value1', 'key2': 'value2'}
|
||||||
|
# 2. params = [('key', 'value1'), ('key', 'value2')]
|
||||||
|
# async with session.get('http://httpbin.org/get', params=params) as resp:
|
||||||
|
# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1'
|
||||||
|
|
||||||
|
# 发送数据
|
||||||
|
# payload = {'key1': 'value1', 'key2': 'value2'}
|
||||||
|
# async with session.post('http://httpbin.org/post', data=payload) as resp:
|
||||||
|
# async with session.post(url, data=json.dumps(payload)) as resp:
|
||||||
|
# print(await resp.text())
|
||||||
|
|
||||||
|
# 发送文件(1)
|
||||||
|
# files = {'file': open('report.xls', 'rb')}
|
||||||
|
# await session.post(url, data=files)
|
||||||
|
|
||||||
|
# 发送数据(2)
|
||||||
|
# data = FormData()
|
||||||
|
# data.add_field('file',
|
||||||
|
# open('report.xls', 'rb'),
|
||||||
|
# filename='report.xls',
|
||||||
|
# content_type='application/vnd.ms-excel')
|
||||||
|
# await session.post(url, data=data)
|
||||||
|
|
||||||
|
# 超时设置
|
||||||
|
# aync with session.get('https://github.com', timeout=60) as r:
|
||||||
|
|
||||||
|
# 代理支持
|
||||||
|
# async with aiohttp.ClientSession() as session:
|
||||||
|
# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp:
|
||||||
|
# print(resp.status)
|
||||||
|
|
||||||
|
# async with aiohttp.ClientSession() as session:
|
||||||
|
# proxy_auth = aiohttp.BasicAuth('user', 'pass')
|
||||||
|
# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp:
|
||||||
|
# print(resp.status)
|
||||||
|
# session.get("http://python.org", proxy="http://user:pass@some.proxy.com")
|
||||||
|
|
||||||
|
# 返回的内容
|
||||||
|
# async with session.get('https://api.github.com/events') as resp:
|
||||||
|
# print(await resp.text())
|
||||||
|
# print(await resp.text(encoding='gbk'))
|
||||||
|
# print(await resp.read())
|
||||||
|
# print(await resp.json())
|
||||||
|
|
||||||
|
# 返回内容较大
|
||||||
|
# with open(filename, 'wb') as fd:
|
||||||
|
# while True:
|
||||||
|
# chunk = await resp.content.read(chunk_size)
|
||||||
|
# if not chunk:
|
||||||
|
# break
|
||||||
|
# fd.write(chunk)
|
||||||
|
|
||||||
|
# 返回的其他变量
|
||||||
|
# async with session.get('http://httpbin.org/get') as resp:
|
||||||
|
# print(resp.status) # 状态码
|
||||||
|
# print(resp.headers) # Headers
|
||||||
|
# print(resp.raw_headers) # 原始Headers
|
||||||
|
# print(resp.cookies) # 返回的Cookie
|
||||||
|
|
||||||
|
# 访问历史History
|
||||||
|
# resp = await session.get('http://example.com/some/redirect/')
|
||||||
|
# resp: <ClientResponse(http://example.com/some/other/url/) [200]>
|
||||||
|
# resp.history: (<ClientResponse(http://example.com/some/redirect/) [301]>,)
|
||||||
|
|
||||||
|
# 释放返回的Response
|
||||||
|
# 1. async with session.get(url) as resp: pass
|
||||||
|
# 2. await resp.release()
|
||||||
|
|
||||||
|
# 连接器: Connectors
|
||||||
|
# conn = aiohttp.TCPConnector()
|
||||||
|
# session = aiohttp.ClientSession(connector=conn)
|
||||||
|
|
||||||
|
# 限制连接池大小:
|
||||||
|
# conn = aiohttp.TCPConnector(limit=30)
|
||||||
|
# conn = aiohttp.TCPConnector(limit=None)
|
Loading…
Reference in New Issue