import reimport lxml.html
import requests
from fake_useragent import UserAgentencoderchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="#用python重写生成session的算法
def f1(a):i, len_str &#61; 0,0c, c2, c3 &#61; 0,0,0len_str &#61; len(a)i &#61; 0b &#61; ""while i < len_str:c &#61; ord(a[i]) & 0xffi &#43;&#61; 1if i &#61;&#61; len_str:b &#43;&#61; encoderchars[c >> 2]b &#43;&#61; encoderchars[(c & 0x3) <<4]b &#43;&#61; "&#61;&#61;"breakc2 &#61; ord(a[i])i &#43;&#61; 1if i &#61;&#61; len_str:b &#43;&#61; encoderchars[c >> 2]b &#43;&#61; encoderchars[(((c & 0x3) <<4) | ((c2 & 0xf0) >> 4))]b &#43;&#61; encoderchars[((c2 & 0xf) <<2)]b &#43;&#61; "&#61;"breakc3 &#61; ord(a[i])b &#43;&#61; encoderchars[c >> 2]b &#43;&#61; encoderchars[(((c & 0x3) <<4) | ((c2 & 0xf0) >> 4))]b &#43;&#61; encoderchars[(((c2 & 0xf) <<2) | ((c3 & 0xc0) >> 6))]b &#43;&#61; encoderchars[c3 & 0x3f]i &#43;&#61; 1return bs &#61; requests.session()
result &#61; s.get(&#39;http://datamining.comratings.com/exam&#39;)
session_id &#61; result.COOKIEs.get_dict()[&#39;session&#39;]
#调用算法生成session
r_COOKIEs &#61; &#39;session&#61;{};c1&#61;{}; c2&#61;{}; path&#61;/&#39;.format(session_id,f1(session_id[1:4]),f1(session_id))headers &#61; {"User-Agent":UserAgent().random,"COOKIE":r_COOKIEs
}ss &#61; s.get(&#39;http://datamining.comratings.com/exam3&#39;,headers&#61;headers).textpattern &#61; re.compile(r&#39;(.*?)&#39;,re.DOTALL)
#用正则切出body内容
result &#61; pattern.findall(ss)
result1 &#61; result[0].split(&#39;
&#39;) #根据
切出来的十一个片段sty &#61; re.compile(r&#39;&#39;,re.DOTALL)
sty1 &#61; sty.findall(ss) #style里面的内容
sty2 &#61; re.compile(r&#39;.(.*?){&#39;)
sty22 &#61; sty2.findall(sty1[0]) # style里面定义的四个属性组成的集合data &#61; [] #空数组&#xff0c;存放十个IP
data.append(result1[0])for data_res in result1[1:]: #遍历按照换行切开的十段字符串line_array &#61; data_res.split(&#39;\n&#39;) #每一行组成的数组ip_data &#61; [] # 组成IP的四个数字存放的数组ip_str &#61; &#39;&#39;for line_str in line_array: #遍历每一行ip_regex &#61; re.compile(r&#39;\d&#43;&#39;)if sty22[0] not in line_str and sty22[1] not in line_str and &#39;none&#39; not in line_str:#利用正则切出符合条件的数字ip_array &#61; ip_regex.findall(line_str)if ip_array !&#61; []:ip_data.append(ip_array[0])#把筛选出来的的四个数字组合成IPip_str &#61; ip_data[0] &#43; &#39;.&#39; &#43; ip_data[1] &#43; &#39;.&#39; &#43; ip_data[2] &#43; &#39;.&#39; &#43; ip_data[3]#把每一个IP存到数组里data.append(ip_str)for i in data:print(i)