빅데이터공부하기 61

 

map.py

 

#!/usr/bin/env python

import sys

 

#--- get all lines from stdin ---

for line in sys.stdin:

#--- remove leading and trailing whitespace---

line = line.strip()

 

#--- split the line into words ---

words = line.split()

 

#--- output tuples [word, 1] in tab-delimited format---

for word in words

print'%s\t%s'%(word,"1")

 


 

reduce.py

 

#!/usr/bin/env python

import sys

word2count = {}

for line in sys.stdin:

line = line.strip()

word, count = line.split('\t',1)

try:

count = int(count)

except ValueError:

continue

try:

word2count[word] = word2count[word]+count

except:

word2count[word] = count

for word in word2count.keys():

print'%s\t%s'%(word,word2count[word])

 

 

 

 

 

 

+ Recent posts