8 class WikipediaTitleHandler(xml
.sax
.ContentHandler
):
9 def startElement(self
, name
, attrs
):
13 def characters(self
, content
):
14 if self
.tag
== 'title':
15 self
.chars
.append(content
)
17 def endElement(self
, name
):
18 if self
.tag
== 'title':
19 title
= ''.join(self
.chars
)
20 if title
.startswith('Talk:'):
22 if title
.startswith('User talk:'):
24 if title
.startswith('Wikipedia:'):
26 if title
.startswith('Wikipedia talk:'):
28 if title
.startswith('User:'):
30 print title
.encode('utf8')
36 def process_xml(input):
38 parser
= xml
.sax
.make_parser()
39 parser
.setContentHandler(WikipediaTitleHandler())
43 if __name__
== '__main__':
44 input = bz2
.BZ2File('/dev/fd/0')