빅데이터공부하기 61. map.py / reduce.py

행복한짱짱이 2015. 6. 24. 17:16

2015. 6. 24. 17:16

빅데이터공부하기 61

map.py

#!/usr/bin/env python

import sys

#--- get all lines from stdin ---

for line in sys.stdin:

#--- remove leading and trailing whitespace---

line = line.strip()

#--- split the line into words ---

words = line.split()

#--- output tuples [word, 1] in tab-delimited format---

for word in words

print'%s＼t%s'%(word,"1")

reduce.py

#!/usr/bin/env python

import sys

word2count = {}

for line in sys.stdin:

line = line.strip()

word, count = line.split('＼t',1)

try:

count = int(count)

except ValueError:

continue

try:

word2count[word] = word2count[word]+count

except:

word2count[word] = count

for word in word2count.keys():

print'%s＼t%s'%(word,word2count[word])

it개발자스터디공간