Skip to content

Commit 2745a5e

Browse files
committed
Handle line-comments and a few more symbols in rustc lexer.
1 parent 6e3a77c commit 2745a5e

File tree

1 file changed

+81
-4
lines changed

1 file changed

+81
-4
lines changed

src/comp/fe/lexer.rs

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,36 @@ fn is_whitespace(char c) -> bool {
2727
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
2828
}
2929

30+
fn consume_any_whitespace(stdio_reader rdr, char c) -> char {
31+
auto c1 = c;
32+
while (is_whitespace(c1)) {
33+
c1 = rdr.getc() as char;
34+
}
35+
be consume_any_line_comment(rdr, c1);
36+
}
37+
38+
fn consume_any_line_comment(stdio_reader rdr, char c) -> char {
39+
auto c1 = c;
40+
if (c1 == '/') {
41+
auto c2 = rdr.getc() as char;
42+
if (c2 == '/') {
43+
while (c1 != '\n') {
44+
c1 = rdr.getc() as char;
45+
}
46+
// Restart whitespace munch.
47+
be consume_any_whitespace(rdr, c1);
48+
}
49+
}
50+
ret c;
51+
}
52+
3053
fn next_token(stdio_reader rdr) -> token.token {
3154
auto eof = (-1) as char;
3255
auto c = rdr.getc() as char;
3356
auto accum_str = "";
3457
auto accum_int = 0;
3558

36-
while (is_whitespace(c) && c != eof) {
37-
c = rdr.getc() as char;
38-
}
59+
c = consume_any_whitespace(rdr, c);
3960

4061
if (c == eof) { ret token.EOF(); }
4162

@@ -61,8 +82,19 @@ fn next_token(stdio_reader rdr) -> token.token {
6182
}
6283
}
6384

64-
// One-byte structural symbols.
85+
86+
fn op_or_opeq(stdio_reader rdr, char c2,
87+
token.op op) -> token.token {
88+
if (c2 == '=') {
89+
ret token.OPEQ(op);
90+
} else {
91+
rdr.ungetc(c2 as int);
92+
ret token.OP(op);
93+
}
94+
}
95+
6596
alt (c) {
97+
// One-byte tokens.
6698
case (';') { ret token.SEMI(); }
6799
case (',') { ret token.COMMA(); }
68100
case ('.') { ret token.DOT(); }
@@ -74,6 +106,8 @@ fn next_token(stdio_reader rdr) -> token.token {
74106
case (']') { ret token.RBRACKET(); }
75107
case ('@') { ret token.AT(); }
76108
case ('#') { ret token.POUND(); }
109+
110+
// Multi-byte tokens.
77111
case ('=') {
78112
auto c2 = rdr.getc() as char;
79113
if (c2 == '=') {
@@ -83,6 +117,49 @@ fn next_token(stdio_reader rdr) -> token.token {
83117
ret token.OP(token.EQ());
84118
}
85119
}
120+
121+
case ('-') {
122+
auto c2 = rdr.getc() as char;
123+
if (c2 == '>') {
124+
ret token.RARROW();
125+
} else {
126+
ret op_or_opeq(rdr, c2, token.MINUS());
127+
}
128+
}
129+
130+
case ('&') {
131+
auto c2 = rdr.getc() as char;
132+
if (c2 == '&') {
133+
ret token.OP(token.ANDAND());
134+
} else {
135+
ret op_or_opeq(rdr, c2, token.AND());
136+
}
137+
}
138+
139+
case ('+') {
140+
ret op_or_opeq(rdr, rdr.getc() as char, token.PLUS());
141+
}
142+
143+
case ('*') {
144+
ret op_or_opeq(rdr, rdr.getc() as char, token.STAR());
145+
}
146+
147+
case ('/') {
148+
ret op_or_opeq(rdr, rdr.getc() as char, token.STAR());
149+
}
150+
151+
case ('!') {
152+
ret op_or_opeq(rdr, rdr.getc() as char, token.NOT());
153+
}
154+
155+
case ('^') {
156+
ret op_or_opeq(rdr, rdr.getc() as char, token.CARET());
157+
}
158+
159+
case ('%') {
160+
ret op_or_opeq(rdr, rdr.getc() as char, token.PERCENT());
161+
}
162+
86163
}
87164

88165
log "lexer stopping at ";

0 commit comments

Comments
 (0)