本次目标 http://www.qiqi.la/vod-detail-id-46194.html 目的,down魔道祖师,实现 前期分析文件得到以下粗略步骤 1 进入二级页面,找到 得到网址 2 访问 https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf 需要带上协议头 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 返回另一个页面response_1 得到文件标题 3 在response_1 得到:var main = "/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3"; 拼合网页:https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/index.m3u8?sign=b0023d8b455da27a4294b38c7815f7b3 访问:得到返回结果 #EXTM3U #EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=800000,RESOLUTION=1080x608 1000k/hls/index.m3u8 4 拼合 https://cn2.zuixinbo.com/20180710/4671_a5ef5a19/1000k/hls/index.m3u8 带协议访问 Referer: https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36 得到.ts下载文件路径 分段下载 5 拼合.ts文件
有空更新完整代码
2018-10-21
开始编写代码
发现在重新写代码的过程中,发现直接在播放页面就有全部的播放地址,只不过是用usc2的
编码转换了一下,我们需要把其转换成ansi编码
2 OK,这下直接拿到播放地址,做一下格式化的工作,进行第2步解析,上面的第一步工作算是白费了一片心思
3 按照上面步骤依次完成,基本没问题
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html' class A(object): def __init__(self,url,e=15): self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'} self.path = os.getcwd() self.e = e self.static_url='https://cn2.zuixinbo.com' def num_of_e(self,url_2):#进入二级页面 res = urlopen(Request(url=url_2,headers=self.header)).read() res = res.decode() title = self.take_middle_text(res,'') #标题 var_main=re.search('var main = "(.*?)";',res).group(1) #访问下级地址 if not var_main: exit() return {'var_main':var_main,'referer':url_2,'标题':title} def open_3(self,url,referer='',**kwargs): url=self.static_url+url a={'Referer': referer} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #得到ts视频地址 return {'url':_.split(),'regerer1':url} def open_4(self,url,referer1='',**kwargs): referer=referer1 referer= referer.split('/') referer=referer[0:-1] referer.append(*url) url='/'.join(referer) print(url) a = {'Referer': referer1} a.update(self.header) res = urlopen(Request(url=url,headers=a)).read() res = res.decode() ts_list=[] for i in res.split('\n'): try: if i[0]!='#': ts_list.append(i) except:pass return {'ts_list':ts_list,'url':url} def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0): # 取出中间文本,真返回中间文本,假返回False # seeks有传参,会按照取前几位取值 # seeke有传参,会按照取后几位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return '传参错误或未找到传参文本' def down_ts(self,dict,path_1): url = os.path.dirname(dict['url'])+'/' ts_list=dict['ts_list'] for i in ts_list: print(path_1,'这里是path_1') path = os.path.join(path_1, i) print(path,'这里是path_ts文件网址') if os.path.exists(path): print('已存在,跳过') else: try: res = urlopen(Request(url=url+i,headers=self.header)).read() with open(path,'wb') as f: f.write(res) print('成功写入一条') except: print('写入失败') def main(self,url): dict_1 = self.num_of_e(url) #'这里返回一个字典 ' dict_2 = self.open_3(dict_1['var_main'],dict_1['referer']) dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #这里的url未提纯 title = dict_1['标题'] path = os.path.join(self.path,title) #@print(title,'这里是标题') if not os.path.exists(path): os.mkdir(path) #没有就创建一个新的目录 self.down_ts(dict_3,path) if __name__ == '__main__': ex = ProcessPoolExecutor(2) a_1 = A(static_url_1, 15) with open('2.txt', 'r', encoding='utf8') as f: for i in f: a = i.split()[0].split('$')[1].split('#')[0] print(ex.submit(a_1.main,a).result()) ex.shutdown()
2018-10-30
# -*- coding:utf-8 -*- # @time:2018-10-21 14:43 # @Auther:1043453579@qq.com from urllib.request import Request from urllib.request import urlopen import re,time,os from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor static_url_1 = 'http://www.qiqi.la/vod-detail-id-46194.html' class A(object): def __init__(self): self.header= {'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'} self.path = os.getcwd() #self.static_url='https://cn2.zuixinbo.com' self.static_url = '' #self.r = redis.Redis(host='127.0.0.1',port=6379,db=0) def get_proxy(self): return {'http':self.r.randomkey().deocode()} def down_1(self,url,referer='',code=True): while True: #proxy = self.get_proxy() #这里设置代理 try: _ = self.header if referer: a = {'Referer': referer} _.update(a) res = urlopen(Request(url=url, headers=_),timeout=60).read() if code: res = res.decode() if res: time.sleep(1) return res else: raise 1 except Exception as e: print('请求失败',url) with open('3.txt','a+') as f: f.write(url) f.write('\n') time.sleep(10) def num_of_e(self,url_2):#进入二级页面 res = self.down_1(url_2) title = self.take_middle_text(res,'') #标题 var_main=re.search('var main = "(.*?)";',res).group(1) #访问下级地址 if not var_main: exit() return {'var_main':var_main,'referer':url_2,'标题':title} def open_3(self,url,referer='',**kwargs): url=self.static_url+url res = self.down_1(url,referer=referer) _=self.take_middle_text(res,'1080x608','.m3u8')+'.m3u8' #得到ts视频地址 return {'url':_.split(),'regerer1':url} def open_4(self,url,referer1='',**kwargs): referer=referer1 referer= referer.split('/') referer=referer[0:-1] referer.append(*url) url='/'.join(referer) print(url) res = self.down_1(url,referer=referer1) ts_list=[] for i in res.split('\n'): try: if i[0]!='#': ts_list.append(i) except:pass return {'ts_list':ts_list,'url':url} def take_middle_text(self,txt, txt_s, txt_e='', seeks=0, seeke=0): # 取出中间文本,真返回中间文本,假返回False # seeks有传参,会按照取前几位取值 # seeke有传参,会按照取后几位取值 try: if txt_e or seeks or seeke: pass else: raise 1 s_1 = txt.find(txt_s) if s_1 == -1: raise 1 l_1 = len(txt_s) if txt_e: s_2 = txt.find(txt_e) if s_1 == -1 or s_2 == -1: return False return txt[s_1 + l_1:s_2] if seeks: return txt[s_1 - seeks:s_1] if seeke: return txt[s_1 + l_1:s_1 + l_1 + seeke] except: return '传参错误或未找到传参文本' def down_ts(self,dict,path_1): url = os.path.dirname(dict['url'])+'/' ts_list=dict['ts_list'] for i in ts_list: path = os.path.join(path_1, i) if os.path.exists(path): print('已存在,跳过',i) else: try: res = urlopen(Request(url=url+i,headers=self.header),timeout=60).read() time.sleep(1) if res: with open(path,'wb') as f: f.write(res) print('成功写入一条',i) else: raise 1 except Exception as e: with open('3.txt','a+') as f: _ = '-'.join([str(i) for i in time.localtime()[0:6]]) f.write(_ +'###'+e+'$$$'+url) f.write('\n') print('写入失败',i,e) time.sleep(5) def main(self,url): _ = url.split('com') self.static_url=_[0]+'com' dict_1 = self.num_of_e(url) #'这里返回一个字典 ' dict_2 = self.open_3(dict_1['var_main'],dict_1['referer']) dict_3 = self.open_4(dict_2['url'], dict_2['regerer1']) #这里的url未提纯 title = dict_1['标题'] path = os.path.join(self.path,title) #@print(title,'这里是标题') if not os.path.exists(path): os.mkdir(path) #没有就创建一个新的目录 self.down_ts(dict_3,path) if __name__ == '__main__': ex = ProcessPoolExecutor(3) a_1 = A() with open('2.txt', 'r', encoding='utf8') as f: for i in f: a = i.split()[0].split('$')[1].split('#')[0] ex.submit(a_1.main,a) ex.shutdown() #BUG在网页的提交网址中
第01集$https://cn2.zuixinbo.com/share/722caafb4825ef5d8670710fa29087cf# 第02集$https://cn2.zuixinbo.com/share/fbad540b2f3b5638a9be9aa6a4d8e450# 第03集$https://v-xunlei.com/share/c457d7ae48d08a6b84bc0b1b9bd7d474# 第04集$https://v-xunlei.com/share/8db1d4a631a6e9a24e2c0e842e1f1772# 第05集$https://v-xunlei.com/share/197f76fe309657064dbec74d9eea4be4# 第06集$https://v-xunlei.com/share/92b70a527191ca64ca2df1cc32142646# 第07集$https://v-xunlei.com/share/abc99d6b9938aa86d1f30f8ee0fd169f# 第08集$https://v-xunlei.com/share/22cdb13a83f73ccd1f79ffaf607b0621# 第09集$https://v-xunlei.com/share/aceacd5df18526f1d96ee1b9714e95eb# 第10集$https://v-6-cn.com/share/075b051ec3d22dac7b33f788da631fd4# 第11集$https://v-6-cn.com/share/4670c07872d5314c6ad6ffa633d4a059# 第12集$https://v-xunlei.com/share/2bba9f4124283edd644799e0cecd45ca# 第13集$https://v-cntv-cn.com/share/d87aa42cd08ba8612664a73dbdb64221# 第14集$https://v-cntv-cn.com/share/63ceea56ae1563b4477506246829b386# 第15集$https://v-cntv-cn.com/share/e8a69bf65aefc23d0f360ab695e9eac7