让Gemini写了个爬虫,试了下可以下载,用图片描述来命名下载的图片。
将下面的代码保存到 download.py
命令行中输入 Python3 download.py
按回车下载,会自动创建文件夹。
(需要安装python3,命令行中输入 pip3 install requests,lxml 安装依赖库)
import requests
from lxml import html
import os
import time
import re
def download_images_with_descriptive_names():
base_url = "?f%5Bcommon_model_name_ssi%5D%5B%5D=Item"
download_folder = "gamble_descriptive_images"
if not .exists(download_folder):
os.makedirs(download_folder)
total_pages = 250
for page_num in range(1, total_pages + 1):
page_url = f"{base_url}&page={page_num}"
print(f"\n--- 正在处理第 {page_num} 页, URL: {page_url} ---")
try:
response = requests.get(page_url, timeout=10)
response.raise_for_status()
except as e:
print(f"获取第 {page_num} 页失败: {e}")
continue
tree = html.fromstring()
image_blocks = tree.xpath('//div[contains(@class, "blacklight-image")]')
if not image_blocks:
print(f"第 {page_num} 页未找到任何图片信息块。")
continue
for i, block in enumerate(image_blocks, 1):
thumbnail_link = block.xpath('.//img[contains(@class, "img-thumbnail")]/@src')
if not thumbnail_link:
continue
thumbnail_link = thumbnail_link[0]
image_description = block.xpath('.//a[@itemprop="name"]/text()')
if not image_description:
ark_id = thumbnail_link.split('/')[3]
filename = ark_id + ".jpg"
else:
filename = image_description[0].strip()
filename = re.sub(r'[\\/:*?"<>|]', '', filename)
filename = filename[:100].strip() + ".jpg"
if '!350,350' in thumbnail_link:
high_res_link = thumbnail_link.replace('!350,350', 'full')
else:
continue
full_download_url = f"}"
file_path = .join(download_folder, filename)
print(f" > 正在处理第 {i} 个图片: {filename}")
try:
if not .exists(file_path):
image_data = requests.get(full_download_url, stream=True, timeout=30)
image_data.raise_for_status()
with open(file_path, 'wb') as f:
for chunk in image_data.iter_content(chunk_size=8192):
f.write(chunk)
print(f" > 下载成功: {filename}")
else:
print(f" > 文件已存在,跳过: {filename}")
except as e:
print(f" > 下载失败: {e}")
time.sleep(0.5)
print(f"\n第 {page_num} 页处理完毕。")
time.sleep(2)
print("\n所有图片处理完毕。")
if __name__ == "__main__":
download_images_with_descriptive_names()