爬虫相关知识请阅读我的其他文章
import re
import xlwt
import requests
from bs4 import BeautifulSoupdef getHtml(url):headers &#61; {&#39;User-Agent&#39;: &#39;Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0&#39;}page &#61; requests.get(url, headers&#61;headers)html &#61;page.textreturn htmlif __name__&#61;&#61;&#39;__main__&#39;:Workbook &#61; xlwt.Workbook()sheet &#61; Workbook.add_sheet(&#39;豆瓣图书Top250&#39;, cell_overwrite_ok&#61;True)sheet.write(0,0,&#39;书名&#39;)sheet.write(0,1,&#39;作者&#39;)sheet.write(0,2,&#39;译者&#39;)sheet.write(0,3,&#39;出版单位&#39;)sheet.write(0,4,&#39;出版时间&#39;)sheet.write(0,5,&#39;定价&#39;)sheet.write(0,6,&#39;豆瓣评分&#39;)sheet.write(0,7,&#39;评价人数&#39;)sheet.write(0,8,&#39;一句话&#39;)i &#61; 1j &#61; 1k &#61; 1m &#61; 1for page in range(0,250,25):url &#61; &#39;https://book.douban.com/top250?start&#61;{0}&#39;.format(page)html &#61; getHtml(url)Soup &#61; BeautifulSoup(html,&#39;html.parser&#39;)names &#61; Soup.find_all(&#39;div&#39;,class_ &#61; &#39;pl2&#39;)for name in names:book &#61; name.find(&#39;a&#39;)book &#61; book.text.strip()book &#61; book.replace(&#39; &#39;,&#39;&#39;)sheet.write(i,0,book)i &#43;&#61; 1Infos &#61; Soup.find_all(&#39;p&#39;,class_ &#61; &#39;pl&#39;)for Info in Infos:r &#61; 1authorinfo &#61; Info.textauthors &#61; authorinfo.split(&#39;/&#39;)if len(authors) < 4:sheet.write(j,1,authors[0])sheet.write(j,2,authors[1])sheet.write(j,3,authors[2])j &#43;&#61; 1continuesheet.write(j,1,authors[0])if authorinfo.count(&#39;/&#39;) &#61;&#61; 4:sheet.write(j,2,authors[r])r &#43;&#61; 1sheet.write(j,3,authors[r])sheet.write(j,4,authors[r&#43;1])sheet.write(j,5,authors[r&#43;2])j &#43;&#61; 1rating_nums &#61; Soup.find_all(&#39;div&#39;,class_ &#61; &#39;star clearfix&#39;)for rating in rating_nums:star &#61; rating.find_all(&#39;span&#39;)sheet.write(k,6,star[1].text)reg &#61; r&#39;\d&#43;&#39;vote &#61; re.findall(reg,star[2].text)sheet.write(k,7,vote)k &#43;&#61; 1quotes &#61; Soup.find_all(&#39;p&#39;,class_ &#61; &#39;quote&#39;)for quote in quotes:sheet.write(m,8,quote.text)m &#43;&#61; 1Workbook.save(&#39;豆瓣图书Top250.xls&#39;)