2 # http://www.michael-noll.com/tutorials/writing-an-hadoop-mapreduce-program-in-python/
4 from operator
import itemgetter
11 # input comes from STDIN
12 for line
in sys
.stdin
:
13 # remove leading and trailing whitespace
16 # parse the input we got from mapper.py
17 word
, count
= line
.split('\t', 1)
19 # convert count (currently a string) to int
23 # count was not a number, so silently
24 # ignore/discard this line
27 # this IF-switch only works because Hadoop sorts map output
28 # by key (here: word) before it is passed to the reducer
29 if current_word
== word
:
30 current_count
+= count
33 # write result to STDOUT
34 print '%s\t%s' %
(current_word
, current_count
)
38 # do not forget to output the last word if needed!
39 if current_word
== word
:
40 print '%s\t%s' %
(current_word
, current_count
)