作者:大美女慧慧慧 | 来源:互联网 | 2023-09-24 16:20
新房详情fromseleniumimportwebdriverfromselenium.webdriver.chrome.optionsimportOptionsfromtimei
新房详情
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
import json
from datetime import datetime
import re
option = webdriver.ChromeOptions()
# 防止打印一些无用的日志
option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
b = webdriver.Chrome(executable_path ="D:\chrome_driver_win32\chromedriver.exe", chrome_optiOns=option)
num = 1
base_urls = "https://nanjing.newhouse.fang.com/house/s/b9{}/".format(num)
b.get(base_urls)
name = b.find_elements_by_xpath('//*[@class="nl_con clearfix"]/ul/li/div/div[1]/a')
house_lst = []
for i in name:
href = (i.get_attribute('href'))
house_lst.append(href)
data_list = []
for url in house_lst:
b.get(url)
data = {}
# 获取楼盘详情
quyu = b.find_element_by_xpath(
'//div[@class="br_left"]//ul[@class="tf f12"]//li[3]/a').text # 一级区域
data['subarea'] = quyu[:-2] # 字符串切片,去掉后面2个字
data['area'] = b.find_element_by_xpath('//div[@class="s2"]/div/a').text # 当前城市
try:
# 详情里的属性
fangyuan_url = b.find_element_by_xpath(
"//*[@class='main_1200 tf']//div[@class='cxfnav']//a[contains(text(),'楼盘详情')]")
href1 = fangyuan_url.get_attribute('href')
b.get(href1)
nodes= any
main_items = b.find_elements_by_xpath('//div[@class="main_1200 tf"]//div[@class="main_1200"]//div[@class="main-cont clearfix"]//div[@class="main-left"]//div[@class="main-item"]')
for i in main_items:
# print(i.find_element_by_xpath(".//h3").text) # .//表示当前目录下的 xxx
nodes1 = i.find_elements_by_xpath('.//ul//li')
for n in nodes1:
print(n.text)
print('-'*50)
# xxx位置及周边
dingwei_url = b.find_element_by_xpath('//div[@class="mapbox_dt"]/iframe').get_attribute(
"src") # 获取定位连接
b.get(dingwei_url)
sound_code = b.page_source # 获取网站的源码
re_search = re.search(r'"mapx":"(.*?)","mapy":"(.*?)"', sound_code, re.DOTALL) # 楼盘坐标..正则匹配"mapx":后面数数字
data['housecoord'] = re_search.group(2) + "," + re_search.group(1)
except Exception as e:
pass
data_list.append(data)
break
print(data_list)
with open('详情(南京).jsonlines', 'a', encoding='utf8') as f:
for data in data_list:
json.dump(data, f, ensure_ascii=False)
f.write('\n')
b.quit()