Skip to content

Commit 53cc673

Browse files
committed
Re-add a read_byte() method to buf_reader objs
This was removed because of the repetition, but doing single-byte reads in terms of read_bytes (which allocates a vec) is needlessly slow. This change speeds up parsing by 22%. (Eventually, we won't be able to escape handling the buffering in the stdlib itself.)
1 parent bb56253 commit 53cc673

File tree

1 file changed

+18
-20
lines changed

1 file changed

+18
-20
lines changed

src/lib/io.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ tag seek_style {seek_set; seek_end; seek_cur;}
1515
type buf_reader =
1616
state obj {
1717
impure fn read(uint len) -> vec[u8];
18+
impure fn read_byte() -> int;
1819
impure fn unread_byte(int byte);
1920
impure fn eof() -> bool;
2021

@@ -60,6 +61,9 @@ state obj FILE_buf_reader(os.libc.FILE f, bool must_close) {
6061
_vec.len_set[u8](buf, read);
6162
ret buf;
6263
}
64+
impure fn read_byte() -> int {
65+
ret os.libc.fgetc(f);
66+
}
6367
impure fn unread_byte(int byte) {
6468
os.libc.ungetc(byte, f);
6569
}
@@ -77,25 +81,13 @@ state obj FILE_buf_reader(os.libc.FILE f, bool must_close) {
7781
}
7882
}
7983

80-
// FIXME: When we have a "self" keyword, move this into read_byte(). This is
81-
// only here so that multiple method implementations below can use it.
82-
//
83-
// FIXME: Return value should be option[u8], not int.
84-
impure fn read_byte_from_buf_reader(buf_reader rdr) -> int {
85-
auto buf = rdr.read(1u);
86-
if (_vec.len[u8](buf) == 0u) {
87-
ret -1;
88-
}
89-
ret buf.(0) as int;
90-
}
91-
9284
// FIXME: Convert this into pseudomethods on buf_reader.
9385
state obj new_reader(buf_reader rdr) {
9486
impure fn get_buf_reader() -> buf_reader {
9587
ret rdr;
9688
}
9789
impure fn read_byte() -> int {
98-
ret read_byte_from_buf_reader(rdr);
90+
ret rdr.read_byte();
9991
}
10092
impure fn unread_byte(int byte) {
10193
ret rdr.unread_byte(byte);
@@ -104,7 +96,7 @@ state obj new_reader(buf_reader rdr) {
10496
ret rdr.read(len);
10597
}
10698
impure fn read_char() -> char {
107-
auto c0 = read_byte_from_buf_reader(rdr);
99+
auto c0 = rdr.read_byte();
108100
if (c0 == -1) {ret -1 as char;} // FIXME will this stay valid?
109101
auto b0 = c0 as u8;
110102
auto w = _str.utf8_char_width(b0);
@@ -113,7 +105,7 @@ state obj new_reader(buf_reader rdr) {
113105
auto val = 0u;
114106
while (w > 1u) {
115107
w -= 1u;
116-
auto next = read_byte_from_buf_reader(rdr);
108+
auto next = rdr.read_byte();
117109
check(next > -1);
118110
check(next & 0xc0 == 0x80);
119111
val <<= 6u;
@@ -131,7 +123,7 @@ state obj new_reader(buf_reader rdr) {
131123
// No break yet in rustc
132124
auto go_on = true;
133125
while (go_on) {
134-
auto ch = read_byte_from_buf_reader(rdr);
126+
auto ch = rdr.read_byte();
135127
if (ch == -1 || ch == 10) {go_on = false;}
136128
else {_vec.push[u8](buf, ch as u8);}
137129
}
@@ -141,7 +133,7 @@ state obj new_reader(buf_reader rdr) {
141133
let vec[u8] buf = vec();
142134
auto go_on = true;
143135
while (go_on) {
144-
auto ch = read_byte_from_buf_reader(rdr);
136+
auto ch = rdr.read_byte();
145137
if (ch < 1) {go_on = false;}
146138
else {_vec.push[u8](buf, ch as u8);}
147139
}
@@ -152,7 +144,7 @@ state obj new_reader(buf_reader rdr) {
152144
auto val = 0u;
153145
auto pos = 0u;
154146
while (size > 0u) {
155-
val += (read_byte_from_buf_reader(rdr) as uint) << pos;
147+
val += (rdr.read_byte() as uint) << pos;
156148
pos += 8u;
157149
size -= 1u;
158150
}
@@ -162,7 +154,7 @@ state obj new_reader(buf_reader rdr) {
162154
auto val = 0u;
163155
auto pos = 0u;
164156
while (size > 0u) {
165-
val += (read_byte_from_buf_reader(rdr) as uint) << pos;
157+
val += (rdr.read_byte() as uint) << pos;
166158
pos += 8u;
167159
size -= 1u;
168160
}
@@ -174,7 +166,7 @@ state obj new_reader(buf_reader rdr) {
174166
auto sz = size; // FIXME: trans.ml bug workaround
175167
while (sz > 0u) {
176168
sz -= 1u;
177-
val += (read_byte_from_buf_reader(rdr) as uint) << (sz * 8u);
169+
val += (rdr.read_byte() as uint) << (sz * 8u);
178170
}
179171
ret val;
180172
}
@@ -221,6 +213,12 @@ state obj byte_buf_reader(byte_buf bbuf) {
221213
bbuf.pos += to_read;
222214
ret range;
223215
}
216+
impure fn read_byte() -> int {
217+
if (bbuf.pos == _vec.len[u8](bbuf.buf)) {ret -1;}
218+
auto b = bbuf.buf.(bbuf.pos);
219+
bbuf.pos += 1u;
220+
ret b as int;
221+
}
224222

225223
impure fn unread_byte(int byte) {
226224
log "TODO: unread_byte";

0 commit comments

Comments
 (0)