#!/home/reeses/bin/ruby # Requires ruby-htmltools # (http://www.bike-nomad.com/ruby/ruby-htmltools-1.01.tar.gz) # and all that implies. # (c) Art Taylor, 2002, etc. etc. There's so much new here that the # copyright claim is so very, very necessary. ;> require 'html/tree' require 'net/http' module TreeElement def ancestor(n) if n == 1 parent else parent.ancestor(n - 1) end end def to_html() if data? ret = "#{self}\n" else ret = "<#{tag}" self.attributes.each { |attr,v| if nil != v s = v.to_s if attr.to_s == "src" and s =~ /^.images/i s.gsub!(/^"/, "\"http\://www.compgeeks.com/") end s = "\"100%\"" if tag == "table" and attr.to_s == "width" ret << " #{attr.to_s}=#{s}" unless attr.to_s =~ /color/i else ret << " #{attr.to_s}" end } ret << ">\n" self.children.each { |child| ret << child.to_html } ret << "\n" end ret end end def parseUrl(host, path) parser = HTMLTreeParser.new() Net::HTTP.new(host).get(path) { |str| parser.feed(str) } parser end class Special def getSpecial parser = parseUrl(@host, @path) parser.tree.each { |e| if e.to_s =~ @regexp print e.ancestor(@depth).to_html end } end end class CompGeeksSpecial < Special def initialize @regexp = /SPECIAL/ @depth = 9 @host = "www.compgeeks.com" @path = "/" end end class BuyDotComSpecial < Special def initialize @regexp = /mistake/ @depth = 7 @host = "www.buy.com" @path = "/default.asp" end end CompGeeksSpecial.new.getSpecial BuyDotComSpecial.new.getSpecial