1.拿到主页面源代码,提取到子页面的地址 href
2.拿子页面的内容,从中找到图片下载地址
3.下载图片
import requests
from bs4 import BeautifulSoup
import time
domain = "https://www.umei.cc"
url = "https://www.umei.cc/bizhitupian/weimeibizhi/"
rese = requests.get(url)
rese.encoding = "utf-8" # 处理乱码
# 把源代码交给bs
page = BeautifulSoup(rese.text, "html.parser") # 指定html解析器
son1 = page.find("div", class_="listlbc_cont_l").find_all("a", class_="img_album_btn")
for i in son1:
son2 = domain + i.get("href") #直接通过get拿到属性值,进行拼接
# 拿子页面源代码
son_rese = requests.get(son2)
son_rese.encoding = "utf-8"
# 从子页面拿下载路径
son_page = BeautifulSoup(son_rese.text, "html.parser")
down1 = son_page.find("div", class_="big-pic").find("img")
down2 = down1.get("src")
# 下载图片
down2_rese = requests.get(down2)
# down2_rese.content # 拿到图片字节
img_name = down2.split("/")[-1] # 拿到url最后一/的内容
with open("img/"+img_name, mode="wb") as f:
f.write(down2_rese.content) #图片内容写入文件
print("over", img_name)
time.sleep(1)
print("all is done")
评论 (0)