Ruby Filter

どうせやるなら、Ruby の中でもパイプラインを構築できるようにすればいいのに。

require 'pp'
require 'uri'
require 'open-uri'
require 'rubygems'
require 'scrapi'

class Filter
   def initialize(&block)
      @proc = block
   end

   def |(filter)
      Filter.new do |arg|
         filter.execute(execute(arg))
      end
   end

   def execute(arg)
      @proc.call(arg)
   end
end

def parseURI
   Filter.new do |arg|
      URI.parse(arg)
   end
end

def getHTTP
   Filter.new do |uri|
      open(uri, 'r') do |fp|
         fp.read
      end
   end
end

def parseHTML
   Filter.new do |html|
      scraper = Scraper.define do
         process 'title',               :title => :text
         process 'link[title=RSS 2.0]', :rss20 => '@href'
         process 'link[title=RSS]',     :rss10 => '@href'
         process 'link[title=RSS 1.0]', :rss10 => '@href'
         process 'link[title=Atom]',    :atom  => '@href'

         result :title, :rss20, :rss10, :atom
      end
      scraper.scrape(html, :parser => :html_parser)
   end
end

if $0 == __FILE__
   pipeline = eval(ARGV.shift)
   uri = ARGV.shift

   pp pipeline.execute(uri)
end

こう使う。

% ruby filter.rb 'parseURI | getHTTP | parseHTML' 'http://d.hatena.ne.jp/odz/'
#<struct
 title="odz buffer",
 rss20="http://d.hatena.ne.jp/odz/rss2",
 rss10="http://d.hatena.ne.jp/odz/rss",
 atom=nil>