hadoop-demo: updated and tested SR; moved to software/unstable
[slapos.git] / software / unstable / hadoop-demo / gutenberg / mapper.py
1 #!/usr/bin/env python
2 # http://www.michael-noll.com/tutorials/writing-an-hadoop-mapreduce-program-in-python/
3
4 import sys
5
6 # input comes from STDIN (standard input)
7 for line in sys.stdin:
8 # remove leading and trailing whitespace
9 line = line.strip()
10 # split the line into words
11 words = line.split()
12 # increase counters
13 for word in words:
14 # write the results to STDOUT (standard output);
15 # what we output here will be the input for the
16 # Reduce step, i.e. the input for reducer.py
17 #
18 # tab-delimited; the trivial word count is 1
19 print '%s\t%s' % (word, 1)