Python-100-Days/Day66-75/code/example10.py

33 lines
1.1 KiB
Python
Raw Normal View History

2018-06-02 13:19:54 +08:00
import requests
from bs4 import BeautifulSoup
# selenium是一个自动化测试工具
# 通过它可以模拟浏览器的行为来访问Web页面
from selenium import webdriver
def main():
# 先下载chromedriver并且将可执行程序放到PATH环境变量路径下
# 创建谷歌Chrome浏览器内核
driver = webdriver.Chrome()
# 通过浏览器内核加载页面(可以加载动态生成的内容)
driver.get('https://www.taobao.com/markets/mm/mm2017')
# driver.page_source获得的页面包含了JavaScript动态创建的内容
soup = BeautifulSoup(driver.page_source, 'lxml')
all_images = soup.select('img[src]')
for image in all_images:
url = image.get('src')
try:
if not str(url).startswith('http'):
url = 'http:' + url
filename = url[url.rfind('/') + 1:]
print(filename)
resp = requests.get(url)
with open('c:/images/' + filename, 'wb') as f:
f.write(resp.content)
except OSError:
print(filename + '下载失败!')
print('图片下载完成!')
if __name__ == '__main__':
main()