作者:Yyao | 来源:互联网 | 2023-09-17 15:23
#-*-coding:UTF-8-*-importrequestsimporttimeimportosfromhashlibimportmd5defget_page(off
# -*- coding:UTF-8 -*-
import requests
import time
import os
from hashlib import md5def get_page(offset):params = {'aid': '24','app_name': 'web_search','offset': offset,'format': 'json','keyword': '街拍','autoload': 'true','count': '20','en_qc': '1','cur_tab': '1','from': 'search_tab','pd': 'synthesis','timestamp': int(time.time() * 1000)}url = 'http://www.toutiao.com/search_content/?'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Ap''pleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36','referer': 'https://www.toutiao.com/search/?keyword=%E8%A1%97%E6%8B%8D','x-requested-with': 'XMLHttpRequest'}try:response = requests.get(url=url, params=params, headers=headers)if response.status_code == 200:return response.json()except requests.ConnectionError:return Nonedef get_images(json):if json.get('data'):for item in json.get('data'):if 'title' in item and 'image_list' in item and item['image_list'] != []:title = item['title']images = item['image_list']for image in images:yield {'image': image.get('url'),'title': title}def save_image(offset, item):if not os.path.exists('picture'+str(offset)):os.mkdir('picture'+str(offset))try:response = requests.get("http:" + item.get('image'))if response.status_code == 200:file_path = '{0}/{1}.{2}'.format('picture'+str(offset), md5(response.content).hexdigest(), 'jpg')if not os.path.exists(file_path):with open(file_path, 'wb') as f:f.write(response.content)else:print('Already Downloaded', file_path)except requests.ConnectionError:print('Failed to Save Image')if __name__ == '__main__':for offset in range(0, 120, 20):json = get_page(offset)for item in get_images(json):save_image(offset, item)