第四节:爬取王者荣耀英雄皮肤实战
课程目标
课程内容
编码实现
爬虫部分
import requests
from fake_useragent import UserAgent
import time
import bs4 import os
url = "https://pvp.qq.com/web201605/herolist.shtml"
us = UserAgent()
headers = {"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7","accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6","cache-control": "max-age=0","priority": "u=0, i","sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Microsoft Edge\";v=\"128\"","sec-ch-ua-mobile": "?0","sec-ch-ua-platform": "\"Windows\"","sec-fetch-dest": "document","sec-fetch-mode": "navigate","sec-fetch-site": "none","sec-fetch-user": "?1","upgrade-insecure-requests": "1","user-agent": us.random
}
res = requests.get(url=url, headers=headers)
html_text = res.content.decode('gbk')
soup = bs4.BeautifulSoup(html_text, 'html.parser')
ul = soup.find("ul", {"class": "herolist clearfix"})
lis = ul.find_all('li')
save_dir = "imgs"
os.makedirs(save_dir, exist_ok=True)
for li in lis:src = li.a.img.get('src')name = li.a.img.get('alt')src_url = "https:" + srcimg_res = requests.get(src_url, headers=headers)base = os.path.join(save_dir, f"{name}.jpg")with open(base, "wb") as f:f.write(img_res.content)print(f"{name} 皮肤下载完成,保存路径为 {base}")time.sleep(1)