作者:tanglei52017 | 来源:互联网 | 2017-05-14 02:24
代码片段,代码分享,PHP代码分享,Java代码分享,Ruby代码分享,Python代码分享,HTML代码分享,CSS代码分享,SQL代码分享,JavaScript代码分享
# encoding: utf-8
require 'thread'
require 'nokogiri'
require 'open-uri'
require 'rss/maker'
$result=Queue.new
def extract_readme_header(no,name,url)
frame = Nokogiri::HTML(open(url))
return unless frame
readme=$url+frame.css('frame')[1]['src']
return unless readme
open(readme) do |f|
doc = Nokogiri::HTML(f.read)
text=doc.css("p#content p#filecontents p")[0..4].map { |c| c.content }.join(" ").strip
return if text.length==0
if text !~ /(rails)|(activ_)/i
puts "========= #{no} #{name} : #{text[0..50]}"
date = f.last_modified
$result <<[no,name,readme,date,text]
end
end
rescue
puts $!.to_s
end
def make_rss(items)
RSS::Maker.make("2.0") do |m|
m.channel.title = "GtitHub recently updated projects"
m.channel.link = "http://localhost"
m.channel.description = "GitHub recently updated projects"
m.items.do_sort = true
items.each do |no,name,url,date,descr|
i = m.items.new_item
i.title = name
i.link = url
i.description=descr
i.date = date
end
end
end
############################## M A I N ########################
############# Scan list of recent project
lth=[]
$url="http://rdoc.info"
puts "get url #{$url}..."
doc = Nokogiri::HTML(open($url))
doc.css(&#39;ul.libraries&#39;)[1].css(&#39;li&#39;).each_with_index do |li,i|
aname =li.css(&#39;a&#39;).first
name=aname.content
purl=$url+aname[&#39;href&#39;]
lth <0
result.sort! { |a,b| a[0] <=> b[0] }
################ format results in rss
File.open("RubyFeeds.rss","w") do |file|
file.write make_rss(result)
end
以上就是使用Ruby和Nokogiri模拟爬虫导出RSS种子的实例详解的详细内容,更多请关注 第一PHP社区 其它相关文章!