Merge remote-tracking branch 'xianhu/master'

master
Andy Ron 2016-12-05 11:47:13 +08:00
commit 6ba9d92a7c
5 changed files with 271 additions and 5 deletions

View File

@ -1,7 +1,7 @@
# LearnPython
以撸代码的形式学习Python, 具体说明在[知乎专栏-撸代码,学知识](https://zhuanlan.zhihu.com/pythoner)
===============================================================================
===================================================================================================
### python_base.py: 千行代码入门Python
### python_visual.py: 15张图入门Matplotlib
@ -23,6 +23,12 @@
### python_decorator.py: Python进阶: 通过实例详解装饰器(附代码)
### python_datetime.py: 你真的了解Python中的日期时间处理吗
===============================================================================
### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass
### python_coroutine.py: Python进阶理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中
### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例
===================================================================================================
### 您可以fork该项目,并在修改后提交Pull request

116
python_aiohttp.py 100644
View File

@ -0,0 +1,116 @@
# _*_ coding: utf-8 _*_
"""
python_aiohttp.py by xianhu
"""
import asyncio
import aiohttp
# 简单实例
async def aiohttp_test01(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
print(resp.status)
print(await resp.text())
loop = asyncio.get_event_loop()
tasks = [aiohttp_test01("https://api.github.com/events")]
loop.run_until_complete(asyncio.wait(tasks))
loop.close()
# 其他Http方法
# session.post('http://httpbin.org/post', data=b'data')
# session.put('http://httpbin.org/put', data=b'data')
# session.delete('http://httpbin.org/delete')
# session.head('http://httpbin.org/get')
# session.options('http://httpbin.org/get')
# session.patch('http://httpbin.org/patch', data=b'data')
# 自定义Headers
# payload = {'some': 'data'}
# headers = {'content-type': 'application/json'}
# await session.post(url, data=json.dumps(payload), headers=headers)
# 自定义Cookie
# cookies = {'cookies_are': 'working'}
# async with ClientSession(cookies=cookies) as session:
# 访问Cookie: session.cookie_jar
# 在URLs中传递参数
# 1. params = {'key1': 'value1', 'key2': 'value2'}
# 2. params = [('key', 'value1'), ('key', 'value2')]
# async with session.get('http://httpbin.org/get', params=params) as resp:
# assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1'
# 发送数据
# payload = {'key1': 'value1', 'key2': 'value2'}
# async with session.post('http://httpbin.org/post', data=payload) as resp:
# async with session.post(url, data=json.dumps(payload)) as resp:
# print(await resp.text())
# 发送文件(1)
# files = {'file': open('report.xls', 'rb')}
# await session.post(url, data=files)
# 发送数据(2)
# data = FormData()
# data.add_field('file',
# open('report.xls', 'rb'),
# filename='report.xls',
# content_type='application/vnd.ms-excel')
# await session.post(url, data=data)
# 超时设置
# aync with session.get('https://github.com', timeout=60) as r:
# 代理支持
# async with aiohttp.ClientSession() as session:
# async with session.get("http://python.org", proxy="http://some.proxy.com") as resp:
# print(resp.status)
# async with aiohttp.ClientSession() as session:
# proxy_auth = aiohttp.BasicAuth('user', 'pass')
# async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp:
# print(resp.status)
# session.get("http://python.org", proxy="http://user:pass@some.proxy.com")
# 返回的内容
# async with session.get('https://api.github.com/events') as resp:
# print(await resp.text())
# print(await resp.text(encoding='gbk'))
# print(await resp.read())
# print(await resp.json())
# 返回内容较大
# with open(filename, 'wb') as fd:
# while True:
# chunk = await resp.content.read(chunk_size)
# if not chunk:
# break
# fd.write(chunk)
# 返回的其他变量
# async with session.get('http://httpbin.org/get') as resp:
# print(resp.status) # 状态码
# print(resp.headers) # Headers
# print(resp.raw_headers) # 原始Headers
# print(resp.cookies) # 返回的Cookie
# 访问历史History
# resp = await session.get('http://example.com/some/redirect/')
# resp: <ClientResponse(http://example.com/some/other/url/) [200]>
# resp.history: (<ClientResponse(http://example.com/some/redirect/) [301]>,)
# 释放返回的Response
# 1. async with session.get(url) as resp: pass
# 2. await resp.release()
# 连接器: Connectors
# conn = aiohttp.TCPConnector()
# session = aiohttp.ClientSession(connector=conn)
# 限制连接池大小:
# conn = aiohttp.TCPConnector(limit=30)
# conn = aiohttp.TCPConnector(limit=None)

View File

@ -0,0 +1,77 @@
# _*_ coding: utf-8 _*_
"""
python_coroutine.py by xianhu
"""
import asyncio
import aiohttp
import threading
# 生产者、消费者例子
def consumer(): # 定义消费者由于有yeild关键词此消费者为一个生成器
print("[Consumer] Init Consumer ......")
r = "init ok" # 初始化返回结果,并在启动消费者时,返回给生产者
while True:
n = yield r # 消费者通过yield关键词接收生产者产生的消息同时返回结果给生产者
print("[Consumer] conusme n = %s, r = %s" % (n, r))
r = "consume %s OK" % n # 消费者消费结果,下个循环返回给生产者
def produce(c): # 定义生产者,此时的 c 为一个生成器
print("[Producer] Init Producer ......")
r = c.send(None) # 启动消费者生成器,同时第一次接收返回结果
print("[Producer] Start Consumer, return %s" % r)
n = 0
while n < 5:
n += 1
print("[Producer] While, Producing %s ......" % n)
r = c.send(n) # 向消费者发送消息,同时准备接收结果。此时会切换到消费者执行
print("[Producer] Consumer return: %s" % r)
c.close() # 关闭消费者生成器
print("[Producer] Close Producer ......")
# produce(consumer())
# 异步IO例子适配Python3.4使用asyncio库
@asyncio.coroutine
def hello(index): # 通过装饰器asyncio.coroutine定义协程
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
yield from asyncio.sleep(1) # 模拟IO任务
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))@asyncio.coroutine
loop = asyncio.get_event_loop() # 得到一个事件循环模型
tasks = [hello(1), hello(2)] # 初始化任务列表
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
loop.close() # 关闭事件循环列表
# 异步IO例子适配Python3.5使用async和await关键字
async def hello1(index): # 通过关键字async定义协程
print('Hello world! index=%s, thread=%s' % (index, threading.currentThread()))
await asyncio.sleep(1) # 模拟IO任务
print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))
loop = asyncio.get_event_loop() # 得到一个事件循环模型
tasks = [hello1(1), hello1(2)] # 初始化任务列表
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
loop.close() # 关闭事件循环列表
# aiohttp 实例
async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
print(url, resp.status)
print(url, await resp.text())
loop = asyncio.get_event_loop() # 得到一个事件循环模型
tasks = [ # 初始化任务列表
get("http://zhushou.360.cn/detail/index/soft_id/3283370"),
get("http://zhushou.360.cn/detail/index/soft_id/3264775"),
get("http://zhushou.360.cn/detail/index/soft_id/705490")
]
loop.run_until_complete(asyncio.wait(tasks)) # 执行任务
loop.close() # 关闭事件循环列表

View File

@ -0,0 +1,55 @@
# _*_ coding: utf-8 _*_
"""
python_metaclass.py by xianhu
"""
class Foo:
def hello(self):
print("hello world!")
return
foo = Foo()
print(type(foo)) # <class '__main__.Foo'>
print(type(foo.hello)) # <class 'method'>
print(type(Foo)) # <class 'type'>
temp = Foo # 赋值给其他变量
Foo.var = 11 # 增加参数
print(Foo) # 作为函数参数
# ========================================================================
def init(self, name):
self.name = name
return
def hello(self):
print("hello %s" % self.name)
return
Foo = type("Foo", (object,), {"__init__": init, "hello": hello, "cls_var": 10})
foo = Foo("xianhu")
print(foo.hello())
print(Foo.cls_var)
print(foo.__class__)
print(Foo.__class__)
print(type.__class__)
# ========================================================================
class Author(type):
def __new__(mcs, name, bases, dict):
# 添加作者属性
dict["author"] = "xianhu"
return super(Author, mcs).__new__(mcs, name, bases, dict)
class Foo(object, metaclass=Author):
pass
foo = Foo()
print(foo.author)

View File

@ -4,7 +4,7 @@
python_requests.py by xianhu
"""
import requests
import requests.adapters
# 不同方式获取网页内容, 返回一个Response对象, 请求的参数可以为url或Request对象
r0 = requests.get("https://github.com/timeline.json")
@ -181,7 +181,7 @@ requests.get("https://github.com", timeout=(3.05, 27))
# 若请求超过了设定的最大重定向次数, 则会抛出一个 TooManyRedirects 异常
# 所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException
# 所有异常
# 所有异常:
# exception requests.RequestException(*args, **kwargs): There was an ambiguous exception that occurred while handling your request.
# exception requests.ConnectionError(*args, **kwargs): A Connection error occurred.
# exception requests.HTTPError(*args, **kwargs): An HTTP error occurred.
@ -223,7 +223,7 @@ proxies = {
requests.get("http://example.org", proxies=proxies)
# 若代理需要使用HTTP Basic Auth, 可以使用http://user:password@host:port/, 比如"http": "http://user:pass@10.10.1.10:3128/"
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理
# 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理, 此时需要单独安装:
# $ pip install requests[socks]
proxies = {
"http": "socks5://user:pass@host:port",
@ -231,5 +231,17 @@ proxies = {
}
requests.get("http://example.org", proxies=proxies)
# Requests 传输适配器
# 从 v1.0.0 以后Requests 的内部采用了模块化设计。部分原因是为了实现传输适配器Transport Adapter
# 传输适配器提供了一个机制,让你可以为 HTTP 服务定义交互方法。尤其是它允许你应用服务前的配置。
# Requests 自带了一个传输适配器,也就是 HTTPAdapter。 这个适配器使用了强大的 urllib3为 Requests 提供了默认的 HTTP 和 HTTPS 交互。
# 每当 Session 被初始化,就会有适配器附着在 Session 上,其中一个供 HTTP 使用,另一个供 HTTPS 使用。
# Request 允许用户创建和使用他们自己的传输适配器,实现他们需要的特殊功能。创建好以后,传输适配器可以被加载到一个会话对象上,附带着一个说明,告诉会话适配器应该应用在哪个 web 服务上。
s = requests.Session()
s.mount("http://baidu.com", requests.adapters.HTTPAdapter())
# 出现错误: Connection pool is full, discarding connection: xxxx.com
s.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100))
# 关闭InsecurePlatformWarning
# requests.packages.urllib3.disable_warnings()