Skip to content

Commit a84310a

Browse files
author
Eric Holk
committed
Started working on MapReduce.
Currently it's only sequential, but it can do word frequency counting. In an ideal world it would all be polymorphic, but that pushes the limits of our type system right now. We can generalize it later.
1 parent d151e18 commit a84310a

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
/**
2+
A parallel word-frequency counting program.
3+
4+
This is meant primarily to demonstrate Rust's MapReduce framework.
5+
6+
It takes a list of files on the command line and outputs a list of
7+
words along with how many times each word is used.
8+
9+
*/
10+
11+
use std;
12+
13+
import std::io;
14+
import option = std::option::t;
15+
import std::option::some;
16+
import std::option::none;
17+
import std::str;
18+
import std::vec;
19+
import std::map;
20+
21+
mod map_reduce {
22+
export putter;
23+
export getter;
24+
export mapper;
25+
export reducer;
26+
export map_reduce;
27+
28+
type putter = fn(str, str) -> ();
29+
30+
type mapper = fn(str, putter);
31+
32+
type getter = fn() -> option[str];
33+
34+
type reducer = fn(str, getter);
35+
36+
37+
fn map_reduce (vec[str] inputs,
38+
mapper f,
39+
reducer reduce) {
40+
auto intermediates = map::new_str_hash[vec[str]]();
41+
42+
fn emit(&map::hashmap[str, vec[str]] im,
43+
str key, str val) {
44+
auto old = [];
45+
alt(im.remove(key)) {
46+
case (some(?v)) {
47+
old = v;
48+
}
49+
case (none) { }
50+
}
51+
52+
im.insert(key, old + [val]);
53+
}
54+
55+
for (str i in inputs) {
56+
f(i, bind emit(intermediates, _, _));
57+
}
58+
59+
fn get(vec[str] vals, &mutable uint i) -> option[str] {
60+
i += 1u;
61+
if(i <= vec::len(vals)) {
62+
some(vals.(i - 1u))
63+
}
64+
else {
65+
none
66+
}
67+
}
68+
69+
for each (@tup(str, vec[str]) kv in intermediates.items()) {
70+
auto i = 0u;
71+
reduce(kv._0, bind get(kv._1, i));
72+
}
73+
}
74+
}
75+
76+
fn main(vec[str] argv) {
77+
if(vec::len(argv) < 2u) {
78+
auto out = io::stdout();
79+
80+
out.write_line(#fmt("Usage: %s <filename> ...", argv.(0)));
81+
fail;
82+
}
83+
84+
fn map(str filename, map_reduce::putter emit) {
85+
auto f = io::file_reader(filename);
86+
87+
while(true) {
88+
alt(read_word(f)) {
89+
case (some(?w)) {
90+
emit(w, "1");
91+
}
92+
case (none) {
93+
break;
94+
}
95+
}
96+
}
97+
}
98+
99+
fn reduce(str word, map_reduce::getter get) {
100+
auto count = 0;
101+
102+
while(true) {
103+
alt(get()) {
104+
case(some(_)) { count += 1 }
105+
case(none) { break }
106+
}
107+
}
108+
109+
auto out = io::stdout();
110+
out.write_line(#fmt("%s: %d", word, count));
111+
}
112+
113+
map_reduce::map_reduce(vec::slice(argv, 1u, vec::len(argv)), map, reduce);
114+
}
115+
116+
fn read_word(io::reader r) -> option[str] {
117+
auto w = "";
118+
119+
while(!r.eof()) {
120+
auto c = r.read_char();
121+
122+
if(is_word_char(c)) {
123+
w += str::from_char(c);
124+
}
125+
else {
126+
if(w != "") {
127+
ret some(w);
128+
}
129+
}
130+
}
131+
ret none;
132+
}
133+
134+
fn is_digit(char c) -> bool {
135+
alt(c) {
136+
case ('0') { true }
137+
case ('1') { true }
138+
case ('2') { true }
139+
case ('3') { true }
140+
case ('4') { true }
141+
case ('5') { true }
142+
case ('6') { true }
143+
case ('7') { true }
144+
case ('8') { true }
145+
case ('9') { true }
146+
case (_) { false }
147+
}
148+
}
149+
150+
fn is_alpha_lower (char c) -> bool {
151+
alt(c) {
152+
case ('a') { true }
153+
case ('b') { true }
154+
case ('c') { true }
155+
case ('d') { true }
156+
case ('e') { true }
157+
case ('f') { true }
158+
case ('g') { true }
159+
case ('h') { true }
160+
case ('i') { true }
161+
case ('j') { true }
162+
case ('k') { true }
163+
case ('l') { true }
164+
case ('m') { true }
165+
case ('n') { true }
166+
case ('o') { true }
167+
case ('p') { true }
168+
case ('q') { true }
169+
case ('r') { true }
170+
case ('s') { true }
171+
case ('t') { true }
172+
case ('u') { true }
173+
case ('v') { true }
174+
case ('w') { true }
175+
case ('x') { true }
176+
case ('y') { true }
177+
case ('z') { true }
178+
case (_) { false }
179+
}
180+
}
181+
182+
fn is_alpha_upper (char c) -> bool {
183+
alt(c) {
184+
case ('A') { true }
185+
case ('B') { true }
186+
case ('C') { true }
187+
case ('D') { true }
188+
case ('E') { true }
189+
case ('F') { true }
190+
case ('G') { true }
191+
case ('H') { true }
192+
case ('I') { true }
193+
case ('J') { true }
194+
case ('K') { true }
195+
case ('L') { true }
196+
case ('M') { true }
197+
case ('N') { true }
198+
case ('O') { true }
199+
case ('P') { true }
200+
case ('Q') { true }
201+
case ('R') { true }
202+
case ('S') { true }
203+
case ('T') { true }
204+
case ('U') { true }
205+
case ('V') { true }
206+
case ('W') { true }
207+
case ('X') { true }
208+
case ('Y') { true }
209+
case ('Z') { true }
210+
case (_) { false }
211+
}
212+
}
213+
214+
fn is_alpha(char c) -> bool {
215+
is_alpha_upper(c) || is_alpha_lower(c)
216+
}
217+
218+
fn is_word_char(char c) -> bool {
219+
is_alpha(c) || is_digit(c) || c == '_'
220+
}

0 commit comments

Comments
 (0)