From bccf92026ebf6fda1552ddf644d0dd4175a77891 Mon Sep 17 00:00:00 2001 From: xianhu Date: Sat, 3 Dec 2016 21:59:12 +0800 Subject: [PATCH] add python_aiohttp.py --- README.md | 2 + python_aiohttp.py | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 python_aiohttp.py diff --git a/README.md b/README.md index 6067d62..a26fdd8 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ ### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass ### python_coroutine.py: Python进阶:理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中 + +### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例 =================================================================================================== ### 您可以fork该项目,并在修改后提交Pull request diff --git a/python_aiohttp.py b/python_aiohttp.py new file mode 100644 index 0000000..ae4ca08 --- /dev/null +++ b/python_aiohttp.py @@ -0,0 +1,116 @@ +# _*_ coding: utf-8 _*_ + +""" +python_aiohttp.py by xianhu +""" + +import asyncio +import aiohttp + + +# 简单实例 +async def aiohttp_test01(url): + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + print(resp.status) + print(await resp.text()) + +loop = asyncio.get_event_loop() +tasks = [aiohttp_test01("https://api.github.com/events")] +loop.run_until_complete(asyncio.wait(tasks)) +loop.close() + +# 其他Http方法 +# session.post('http://httpbin.org/post', data=b'data') +# session.put('http://httpbin.org/put', data=b'data') +# session.delete('http://httpbin.org/delete') +# session.head('http://httpbin.org/get') +# session.options('http://httpbin.org/get') +# session.patch('http://httpbin.org/patch', data=b'data') + +# 自定义Headers +# payload = {'some': 'data'} +# headers = {'content-type': 'application/json'} +# await session.post(url, data=json.dumps(payload), headers=headers) + +# 自定义Cookie +# cookies = {'cookies_are': 'working'} +# async with ClientSession(cookies=cookies) as session: +# 访问Cookie: session.cookie_jar + +# 在URLs中传递参数 +# 1. params = {'key1': 'value1', 'key2': 'value2'} +# 2. params = [('key', 'value1'), ('key', 'value2')] +# async with session.get('http://httpbin.org/get', params=params) as resp: +# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1' + +# 发送数据 +# payload = {'key1': 'value1', 'key2': 'value2'} +# async with session.post('http://httpbin.org/post', data=payload) as resp: +# async with session.post(url, data=json.dumps(payload)) as resp: +# print(await resp.text()) + +# 发送文件(1) +# files = {'file': open('report.xls', 'rb')} +# await session.post(url, data=files) + +# 发送数据(2) +# data = FormData() +# data.add_field('file', +# open('report.xls', 'rb'), +# filename='report.xls', +# content_type='application/vnd.ms-excel') +# await session.post(url, data=data) + +# 超时设置 +# aync with session.get('https://github.com', timeout=60) as r: + +# 代理支持 +# async with aiohttp.ClientSession() as session: +# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp: +# print(resp.status) + +# async with aiohttp.ClientSession() as session: +# proxy_auth = aiohttp.BasicAuth('user', 'pass') +# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp: +# print(resp.status) +# session.get("http://python.org", proxy="http://user:pass@some.proxy.com") + +# 返回的内容 +# async with session.get('https://api.github.com/events') as resp: +# print(await resp.text()) +# print(await resp.text(encoding='gbk')) +# print(await resp.read()) +# print(await resp.json()) + +# 返回内容较大 +# with open(filename, 'wb') as fd: +# while True: +# chunk = await resp.content.read(chunk_size) +# if not chunk: +# break +# fd.write(chunk) + +# 返回的其他变量 +# async with session.get('http://httpbin.org/get') as resp: +# print(resp.status) # 状态码 +# print(resp.headers) # Headers +# print(resp.raw_headers) # 原始Headers +# print(resp.cookies) # 返回的Cookie + +# 访问历史History +# resp = await session.get('http://example.com/some/redirect/') +# resp: +# resp.history: (,) + +# 释放返回的Response +# 1. async with session.get(url) as resp: pass +# 2. await resp.release() + +# 连接器: Connectors +# conn = aiohttp.TCPConnector() +# session = aiohttp.ClientSession(connector=conn) + +# 限制连接池大小: +# conn = aiohttp.TCPConnector(limit=30) +# conn = aiohttp.TCPConnector(limit=None)