Skip to content

Commit dca67f9

Browse files
committed
Make the lexer slightly less stateful
1 parent e949aab commit dca67f9

File tree

2 files changed

+25
-29
lines changed

2 files changed

+25
-29
lines changed

src/comp/syntax/parse/lexer.rs

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ type reader =
1919
fn next() -> char ;
2020
fn init() ;
2121
fn bump() ;
22-
fn mark() ;
23-
fn get_mark_chpos() -> uint ;
24-
fn get_mark_str() -> str ;
22+
fn get_str_from(uint) -> str ;
2523
fn get_interner() -> @interner::interner[str] ;
2624
fn get_chpos() -> uint ;
2725
fn get_byte_pos() -> uint ;
@@ -38,21 +36,16 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
3836
mutable uint col,
3937
mutable uint pos,
4038
mutable char ch,
41-
mutable uint mark_pos,
42-
mutable uint mark_chpos,
4339
mutable uint chpos,
4440
mutable str[] strs,
4541
codemap::filemap fm,
4642
@interner::interner[str] itr) {
4743
fn is_eof() -> bool { ret ch == -1 as char; }
48-
fn mark() { mark_pos = pos; mark_chpos = chpos; }
49-
fn get_mark_str() -> str {
44+
fn get_str_from(uint start) -> str {
5045
// I'm pretty skeptical about this subtraction. What if there's a
5146
// multi-byte character before the mark?
52-
ret str::slice(src, mark_pos - 1u,
53-
pos - 1u);
47+
ret str::slice(src, start - 1u, pos - 1u);
5448
}
55-
fn get_mark_chpos() -> uint { ret mark_chpos; }
5649
fn get_chpos() -> uint { ret chpos; }
5750
fn get_byte_pos() -> uint { ret pos; }
5851
fn curr() -> char { ret ch; }
@@ -90,9 +83,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
9083
}
9184
let str[] strs = ~[];
9285
auto rd =
93-
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
94-
filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
95-
itr);
86+
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char,
87+
filemap.start_pos.ch, strs, filemap, itr);
9688
rd.init();
9789
ret rd;
9890
}
@@ -346,11 +338,17 @@ fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
346338
ret accum_int as char;
347339
}
348340

349-
fn next_token(&reader rdr) -> token::token {
350-
auto accum_str = "";
341+
fn next_token(&reader rdr) -> tup(token::token, uint, uint) {
351342
consume_whitespace_and_comments(rdr);
352-
if (rdr.is_eof()) { ret token::EOF; }
353-
rdr.mark();
343+
auto start_chpos = rdr.get_chpos();
344+
auto start_bpos = rdr.get_byte_pos();
345+
auto tok = if rdr.is_eof() { token::EOF }
346+
else { next_token_inner(rdr) };
347+
ret tup(tok, start_chpos, start_bpos);
348+
}
349+
350+
fn next_token_inner(&reader rdr) -> token::token {
351+
auto accum_str = "";
354352
auto c = rdr.curr();
355353
if (is_alpha(c) || c == '_') {
356354
while (is_alnum(c) || c == '_') {
@@ -762,11 +760,10 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path)
762760
break;
763761
}
764762
auto tok = next_token(rdr);
765-
if (is_lit(tok)) {
766-
literals += ~[rec(lit=rdr.get_mark_str(),
767-
pos=rdr.get_mark_chpos())];
763+
if (is_lit(tok._0)) {
764+
literals += ~[rec(lit=rdr.get_str_from(tok._2), pos=tok._1)];
768765
}
769-
log "tok: " + token::to_str(rdr, tok);
766+
log "tok: " + token::to_str(rdr, tok._0);
770767
first_read = false;
771768
}
772769
ret rec(cmnts=comments, lits=literals);

src/comp/syntax/parse/parser.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
9090
// + ":" + common::istr(lo.line as int);
9191

9292
last_lo = lo;
93-
tok = lexer::next_token(rdr);
94-
lo = rdr.get_mark_chpos();
93+
auto next = lexer::next_token(rdr);
94+
tok = next._0;
95+
lo = next._1;
9596
hi = rdr.get_chpos();
9697
}
9798
fn fatal(str m) -> ! {
@@ -122,11 +123,9 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
122123
fn get_sess() -> parse_sess { ret sess; }
123124
}
124125

125-
// Make sure npos points at first actual token:
126-
lexer::consume_whitespace_and_comments(rdr);
127-
auto npos = rdr.get_chpos();
128-
ret stdio_parser(sess, cfg, ftype, lexer::next_token(rdr),
129-
npos, npos, npos, UNRESTRICTED, rdr,
126+
auto tok0 = lexer::next_token(rdr);
127+
ret stdio_parser(sess, cfg, ftype, tok0._0,
128+
tok0._1, tok0._1, tok0._1, UNRESTRICTED, rdr,
130129
prec_table(), bad_expr_word_table());
131130
}
132131

@@ -693,13 +692,13 @@ fn parse_path(&parser p) -> ast::path {
693692
case (token::IDENT(?i, _)) {
694693
hi = p.get_hi_pos();
695694
ids += ~[p.get_str(i)];
695+
hi = p.get_hi_pos();
696696
p.bump();
697697
if (p.peek() == token::MOD_SEP) { p.bump(); } else { break; }
698698
}
699699
case (_) { break; }
700700
}
701701
}
702-
hi = p.get_hi_pos();
703702
ret spanned(lo, hi, rec(global=global, idents=ids, types=~[]));
704703
}
705704

0 commit comments

Comments
 (0)