作者:Chinaexpoinfo | 来源:互联网 | 2024-12-13 02:08
在Python中实现多进程编程时,尤其是在处理大规模数据集或高并发请求时,可能会遇到程序执行完成后无法正常退出的问题。这种退出异常通常发生在进程数量较多且任务复杂度较高的情况下。本文将介绍一种有效的方法来解决此类问题,并通过一个具体的示例来说明如何实现。
import argparse
import requests
from multiprocessing import Pool
import datetime
time
from requests.packages.urllib3.exceptions import InsecureRequestWarning
# 禁用安全请求警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def parse_arguments():
"""解析命令行参数"""
parser = argparse.ArgumentParser(description='批量处理URL请求脚本')
parser.add_argument('-f', dest='input_file', type=str, required=True, help='输入URL列表文件')
parser.add_argument('-o', dest='output_file', type=str, default='results.txt', help='输出结果文件')
parser.add_argument('-p', dest='proxy_server', type=str, default='', help='代理服务器地址')
parser.add_argument('-n', dest='num_processes', type=int, default=1, help='进程数量')
args = parser.parse_args()
return args.input_file, args.output_file, args.proxy_server, args.num_processes
def configure_proxy(proxy_server):
"""配置代理服务器"""
if not proxy_server:
return {}
username = input('请输入用户名: ')
password = getpass.getpass('请输入密码: ')
http_proxy = f'http://{username}:{password}@{proxy_server}'
https_proxy = f'https://{username}:{password}@{proxy_server}'
return {
'http': http_proxy,
'https': https_proxy
}
def read_urls(file_path):
"""从文件读取URL列表"""
with open(file_path, 'r') as file:
urls = file.readlines()
return [url.strip() for url in urls]
def send_request(url, proxies=''):
"""发送HTTP请求并返回状态码"""
headers = {'User-Agent': 'curl/3.03', 'Connection': 'close'}
try:
respOnse= requests.get(url, headers=headers, proxies=proxies, timeout=15, verify=False)
return f'{url} {response.status_code}'
except Exception as e:
return f'{url} {e}'
def main():
start_time = datetime.datetime.now()
input_file, output_file, proxy_server, num_processes = parse_arguments()
urls = read_urls(input_file)
proxies = configure_proxy(proxy_server)
pool = Pool(num_processes)
print(f'总URL数量: {len(urls)}')
def log_result(result):
with open(output_file, 'a+') as file:
file.write(result + '\n')
results = [pool.apply_async(send_request, args=(url, proxies), callback=log_result) for url in urls]
pool.close()
completed_count = 0
while True:
try:
time.sleep(60)
for result in results:
if result.ready():
completed_count += 1
result.get()
if completed_count > len(results) * 0.95:
break
except Exception as e:
print(f'进程异常: {e}')
pool.terminate()
pool.join()
end_time = datetime.datetime.now()
print(f'开始时间: {start_time}')
print(f'结束时间: {end_time}')
print(f'总耗时: {end_time - start_time}')
print(f'结果保存在: {output_file}')
if __name__ == '__main__':
main()