#!/usr/bin/env python from HTMLParser import HTMLParser import sys, formatter from xml.dom.minidom import parse, parseString import xml.xpath if __name__ == '__main__': doc = "" for line in sys.stdin.readlines(): doc = doc + line dom = parseString(doc) links = xml.xpath.Evaluate("//h3[@class='desc']/a[1]", dom) seen_links = {} for link in links: href = link.getAttribute("href") if seen_links.has_key(href): post = link.parentNode.parentNode post.parentNode.removeChild(post) else: seen_links[href] = 1 # print dom.writexml(sys.stdout)