33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
# selenium是一个自动化测试工具
|
||
|
# 通过它可以模拟浏览器的行为来访问Web页面
|
||
|
from selenium import webdriver
|
||
|
|
||
|
|
||
|
def main():
|
||
|
# 先下载chromedriver并且将可执行程序放到PATH环境变量路径下
|
||
|
# 创建谷歌Chrome浏览器内核
|
||
|
driver = webdriver.Chrome()
|
||
|
# 通过浏览器内核加载页面(可以加载动态生成的内容)
|
||
|
driver.get('https://www.taobao.com/markets/mm/mm2017')
|
||
|
# driver.page_source获得的页面包含了JavaScript动态创建的内容
|
||
|
soup = BeautifulSoup(driver.page_source, 'lxml')
|
||
|
all_images = soup.select('img[src]')
|
||
|
for image in all_images:
|
||
|
url = image.get('src')
|
||
|
try:
|
||
|
if not str(url).startswith('http'):
|
||
|
url = 'http:' + url
|
||
|
filename = url[url.rfind('/') + 1:]
|
||
|
print(filename)
|
||
|
resp = requests.get(url)
|
||
|
with open('c:/images/' + filename, 'wb') as f:
|
||
|
f.write(resp.content)
|
||
|
except OSError:
|
||
|
print(filename + '下载失败!')
|
||
|
print('图片下载完成!')
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|