作者:梦魇心芽_932 | 来源:互联网 | 2023-10-12 13:07
day学习总结与作业importcsvimportrequestsfrombs4importBeautifulSoupimportreheaders{user-agent:
day学习总结与作业
import csvimport requestsfrom bs4 import BeautifulSoupimport re headers = { 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36' } url = 'https://mapi.guazi.com/car-source/carList/pcList?minor=benz&sourceType=&ec_buy_car_list_ab=&location_city=&district_id=&tag=-1&license_date=&auto_type=&driving_type=&gearbox=&road_haul=&air_displacement=&emission=&car_color=&guobie=&bright_spot_config=&seat=&fuel_type=&order=&priceRange=0,-1&tag_types=&diff_city=&intention_options=&initialPriceRange=&monthlyPriceRange=&transfer_num=&car_year=&carid_qigangshu=&carid_jinqixingshi=&cheliangjibie=&page=1&pageSize=20&city_filter=12&city=12&guazi_city=12&qpres=&versionId=0.0.0.0&osv=Unknown&platfromSource=wap' response = requests. get( url, headers= headers) result0= response. json( ) [ 'data' ] [ 'postList' ] red= [ ] table = { '0xe1d0' : '7' , '0xe325' : '4' , '0xe41d' : '1' , '0xe52e' : '9' , '0xe630' : '2' , '0xe76e' : '8' , '0xe891' : '5' , '0xe9ce' : '0' , '0xeaf2' : '3' , '0xec4c' : '6' , '0xf88a' : '7' } for x in result0: title= x[ 'title' ] license_date= x[ 'license_date' ] road_haul= x[ 'road_haul' ] . split( ';' ) new_road_haul = '' for g in road_haul: if g. startswith( '&#' ) : new_road_haul += table[ hex ( int ( g[ 2 : ] ) ) ] elif g. startswith( '.&#' ) : new_road_haul += '.' + table[ hex ( int ( g[ 3 : ] ) ) ] else : new_road_haul += gfirst_pay= x[ 'first_pay' ] . split( ';' ) new_first_pay = '' for h in first_pay: if h. startswith( '&#' ) : new_first_pay += table[ hex ( int ( h[ 2 : ] ) ) ] elif h. startswith( '.&#' ) : new_first_pay += '.' + table[ hex ( int ( h[ 3 : ] ) ) ] else : new_first_pay += hif not new_first_pay: new_first_pay= '不支持首付' buy_out_price= x[ 'price' ] . split( ';' ) new_buy_out_price = '' for h in buy_out_price: if h. startswith( '&#' ) : new_buy_out_price+= table[ hex ( int ( h[ 2 : ] ) ) ] elif h. startswith( '.&#' ) : new_buy_out_price += '.' + table[ hex ( int ( h[ 3 : ] ) ) ] else : new_buy_out_price += hred. append( [ title, license_date, new_road_haul, new_first_pay, new_buy_out_price] ) result1= csv. writer( open ( 'files/ershouche.csv' , 'w' , encoding= 'utf-8' , newline= '' ) ) result1. writerow( [ '标题' , '年限' , '里程' , '首付' , '一次付' ] ) result1. writerows( red)
学习总结
import requestsdef get_html ( url) : headers = { 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36' } response = requests. get( url, headers= headers, proxies= { 'https' : '36.25.226.139:4513' } ) print ( response. text) return response. textif __name__ == '__main__' : url = 'https://movie.douban.com/top250' get_html( url) from selenium. webdriver import Chrome, ChromeOptionsoptions = ChromeOptions( ) options. add_experimental_option( 'excludeSwitches' , [ 'enable-automation' ] ) options. add_experimental_option( "prefs" , { "profile.managed_default_content_settings.images" : 2 } ) options. add_argument( '--proxy-server=http://122.6.202.214:4510' ) b = Chrome( options= options) b. get( 'https://movie.douban.com/top250' ) import requests""" 第1步:在谷歌浏览器中打开网页完成登录操作,然后刷新页面 第2步:打开当前页面的检查,在network的All选项下,找到当前页面的请求,获取Request Headers中的COOKIE值 第3步:在用requests发送请求的时候给headers赋值,在headers中添加 COOKIE对应的键值对 """ headers = { 'COOKIE' : '_zap=b7912ec1-8e24-4d72-81cd-8fa2a2b5f78f; d_c0="AECfz77blxSPTv9pswdcr3xSHE0eCEMj0Dc=|1646568427"; _xsrf=qy8EtuCtpOGgIUmA3g0qSi2edFqOUIBw; __snaker__id=kvuiOSuliPGkosHY; _9755xjdesxxd_=32; YD00517437729195%3AWM_TID=pJYUp8Detk5AUUUUFFc6upRtNshfmnQX; q_c1=51a6e1898c4d46a594865b3db1dd3e95|1647227093000|1647227093000; NOT_UNREGISTER_WAITING=1; gdxidpyhxdE=q6eDxuI%5CS11auZ9%2Be%5C%2BxIx%2F2cg96ULLZchHGSiL8EQvgYs9OmbeiyhhCXoa%5C%2BblJQfD%5CayzzA8oosyB%2FhPPMm7%2Fkd8W5prCdSYuppYUL5qdoyPdjsHcgA8pZgBquceXtX6di5Mu46C7dEKcHakVA7mxxjMZh%2Bre5j%2F4AQta4bulZ39y%5C%3A1647834647437; YD00517437729195%3AWM_NI=oe9bRvHOQTEjCNug5CHPzre%2BCdBGZr6dru1M9KaaTCyY5hZsiM2d%2FSXjfLKsl91VYoTN6x%2Fvc%2FWYtZWgHtA%2BkNpQdcQgt86C%2F1Vffl9dc8gqk08aF1%2Bp0LL%2BSZzsecFXbUI%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6ee99e96f86e9ada5f97cbaac8aa3c84f878a9a85aa7afbeeac8cc97e8c989f8bb32af0fea7c3b92aadbbf783d17e8c919fb5c8628192bb88b8498c90b693e82181bc8ea8d97af6bcbe96b447fb95af82d749bb87fb89d979ab93fe85ae599cb6aaadca79b0e7ffb7dc659892fa99ae7ea1f199baf75295b98895c421a992c0b2e76993b9bcd9b549edef9a87e568fbbcadd9e67ba9f0f882c53be9b000adcf219186b694e8698688aeb6cc37e2a3; captcha_session_v2=2|1:0|10:1647833755|18:captcha_session_v2|88:Q0FUYjlIemk4MklmZlA5ZlVOWDczbGlhYjRabzMxME1Fc0N6Szl4aGVTQU5JQkdGUklNWFRBVVZtU0hYQUVvLw==|cf267f149c0b9509d0e424579194d94fc6ec1f57567afd34050dc2a47f14bacc; captcha_ticket_v2=2|1:0|10:1647833769|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfZ2pLT21aR2FUbHdXWFo2SG9wTUY1OWRZTG0tcG1vcGlfNUtrUkpzUThJSmNlV2VJdVVzdEhWNks4TVNBcVhobEZPUkZMcjUuc3N1QXpmUzBHbnIuSUNIdjlnb04tdjdoMlBydUtGSmdmV2FYUUZ2SE10dUl0RU40TzRFQS14dEhtMXJRLWg3UHdRblRMVkt1VG1kX09oekdab0pzUGxUWGFmUVF1NzRfYjc4di1QUmlQVUJMR0s5TEhvTGpDX2JUMF9jNFllNlM1SVFYSGgtMHAwV1ZMR2IwMWVqMS5FdEg1ZzZHRUZNZWMwLU8uMXk3YnY2dzlHdGNmNEE1a3E2Ry5EOWJSVlN4QjRWWmZodU1Gdk54N2lvNkZNUjJCdmw4Q254R1NUeUhtLXI1Yl9xeEZPS00wLnJONjkweW14cTQ5MHNjUzBKeHBwUE9lY05BSDZyNV9Vb3ZxZkFkR191aEQwOGc5ZUkwR3k5X005QVkub3gxOS5QdE1GanQwc1dtNERVbVZwQ3ZJV3R4V21nUS5Nd0tVTk92U3N1ZFlmcXJGMmhIWjUyTXZZUzFLc1RQaGlTNTRQWDRETElCdXQuNmU0ZUpzUFUyUzBWeUJZUXE5WFZSOEUwYzE5MEVYX2x5cEJ2MVdManBzVXJlV0t3d19GcGhTLnguVnlGMyJ9|22db975942e5d8c1799fa7c70120378eed6a34bb8bdd00afa3c5eccf92436296; z_c0=2|1:0|10:1647833781|4:z_c0|92:Mi4xaW5CWUdRQUFBQUFBUUpfUHZ0dVhGQ1lBQUFCZ0FsVk50VUFsWXdEWFYyXzVOZVRzbGJmbVg4Mk1GNF95bUYtLWd3|9d5f40b82ca91b89a903d5d50df74ed66850c2b3ecf03d86510300f5eb729d96; tst=r; SESSIONID=hbQ6zfAEKL3cjwOoewSAbkpnJKR4GtTVNCDhhjrorQ2; KLBRSID=0a401b23e8a71b70de2f4b37f5b4e379|1647833814|1647833734' , 'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36' } response = requests. get( 'https://www.zhihu.com/' , headers= headers) print ( response. text) from selenium. webdriver import Chromefrom selenium. webdriver. common. keys import Keys b = Chrome( ) b. get( 'https://www.taobao.com/' ) COOKIE_list = eval ( open ( 'files/taobao.txt' , encoding= 'utf-8' ) . read( ) ) for COOKIE in COOKIE_list: b. add_COOKIE( COOKIE) b. get( 'https://www.taobao.com/' ) search = b. find_element_by_id( 'q' ) search. send_keys( '雪糕' ) search. send_keys( Keys. ENTER) from selenium. webdriver import Chrome b = Chrome( ) b. get( 'https://www.taobao.com/' ) input ( '是否完成:' ) COOKIEs = b. get_COOKIEs( ) open ( 'files/taobao.txt' , 'w' , encoding= 'utf-8' ) . write( str ( COOKIEs) )