Skip to content

Commit a411c86

Browse files
jdmbrson
authored andcommitted
Make clear the differentiation between char pos and byte pos in filemaps. Fix up error printing for files with multi-byte characters.
1 parent 3ce43f3 commit a411c86

File tree

6 files changed

+64
-34
lines changed

6 files changed

+64
-34
lines changed

src/comp/driver/session.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ obj session(@config targ_cfg,
103103
fn unimpl(str msg) -> ! { self.bug("unimplemented " + msg); }
104104
fn get_codemap() -> codemap::codemap { ret parse_sess.cm; }
105105
fn lookup_pos(uint pos) -> codemap::loc {
106-
ret codemap::lookup_pos(parse_sess.cm, pos);
106+
ret codemap::lookup_char_pos(parse_sess.cm, pos);
107107
}
108108
fn get_parse_sess() -> parse_sess { ret parse_sess; }
109109
fn next_node_id() -> ast::node_id {

src/comp/syntax/codemap.rs

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,50 +9,69 @@ import std::option::none;
99

1010
type filename = str;
1111

12+
type file_pos = rec(uint ch, uint byte);
13+
1214
/* A codemap is a thing that maps uints to file/line/column positions
1315
* in a crate. This to make it possible to represent the positions
1416
* with single-word things, rather than passing records all over the
1517
* compiler.
1618
*/
17-
type filemap = @rec(filename name, uint start_pos, mutable uint[] lines);
19+
type filemap = @rec(filename name, file_pos start_pos,
20+
mutable file_pos[] lines);
1821

1922
type codemap = @rec(mutable filemap[] files);
2023

2124
type loc = rec(filename filename, uint line, uint col);
2225

2326
fn new_codemap() -> codemap {
24-
let filemap[] files = ~[];
25-
ret @rec(mutable files=files);
27+
ret @rec(mutable files=~[]);
28+
}
29+
30+
fn new_filemap(filename filename, uint start_pos_ch, uint start_pos_byte)
31+
-> filemap {
32+
ret @rec(name=filename, start_pos=rec(ch=start_pos_ch,
33+
byte=start_pos_byte),
34+
mutable lines=~[rec(ch=start_pos_ch, byte=start_pos_byte)]);
2635
}
2736

28-
fn new_filemap(filename filename, uint start_pos) -> filemap {
29-
ret @rec(name=filename, start_pos=start_pos, mutable lines=[start_pos]);
37+
fn next_line(filemap file, uint chpos, uint byte_pos) {
38+
file.lines += ~[rec(ch=chpos, byte=byte_pos)];
3039
}
3140

32-
fn next_line(filemap file, uint pos) { file.lines += ~[pos]; }
41+
type lookup_fn = fn (file_pos pos) -> uint;
3342

34-
fn lookup_pos(codemap map, uint pos) -> loc {
43+
fn lookup_pos(codemap map, uint pos, lookup_fn lookup) -> loc {
3544
auto a = 0u;
36-
auto b = ivec::len[filemap](map.files);
45+
auto b = ivec::len(map.files);
3746
while (b - a > 1u) {
3847
auto m = (a + b) / 2u;
39-
if (map.files.(m).start_pos > pos) { b = m; } else { a = m; }
48+
if (lookup(map.files.(m).start_pos) > pos) { b = m; } else { a = m; }
4049
}
4150
auto f = map.files.(a);
4251
a = 0u;
43-
b = ivec::len[uint](f.lines);
52+
b = ivec::len(f.lines);
4453
while (b - a > 1u) {
4554
auto m = (a + b) / 2u;
46-
if (f.lines.(m) > pos) { b = m; } else { a = m; }
55+
if (lookup(f.lines.(m)) > pos) { b = m; } else { a = m; }
4756
}
48-
ret rec(filename=f.name, line=a + 1u, col=pos - f.lines.(a));
57+
ret rec(filename=f.name, line=a + 1u, col=pos - lookup(f.lines.(a)));
58+
}
59+
60+
fn lookup_char_pos(codemap map, uint pos) -> loc {
61+
fn lookup(file_pos pos) -> uint { ret pos.ch; }
62+
ret lookup_pos(map, pos, lookup);
63+
}
64+
65+
fn lookup_byte_pos(codemap map, uint pos) -> loc {
66+
fn lookup(file_pos pos) -> uint { ret pos.byte; }
67+
ret lookup_pos(map, pos, lookup);
4968
}
5069

5170
type span = rec(uint lo, uint hi);
5271

5372
fn span_to_str(&span sp, &codemap cm) -> str {
54-
auto lo = lookup_pos(cm, sp.lo);
55-
auto hi = lookup_pos(cm, sp.hi);
73+
auto lo = lookup_char_pos(cm, sp.lo);
74+
auto hi = lookup_char_pos(cm, sp.hi);
5675
ret #fmt("%s:%u:%u:%u:%u", lo.filename, lo.line, lo.col, hi.line, hi.col);
5776
}
5877

@@ -115,8 +134,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color,
115134

116135
// If there's one line at fault we can easily point to the problem
117136
if (ivec::len(lines.lines) == 1u) {
118-
auto lo = codemap::lookup_pos(cm, option::get(sp).lo);
119-
auto lo = lookup_pos(cm, option::get(sp).lo);
137+
auto lo = lookup_char_pos(cm, option::get(sp).lo);
120138
auto digits = 0u;
121139
auto num = lines.lines.(0) / 10u;
122140

@@ -129,7 +147,7 @@ fn emit_diagnostic(&option::t[span] sp, &str msg, &str kind, u8 color,
129147
while (left > 0u) { str::push_char(s, ' '); left -= 1u; }
130148

131149
s += "^";
132-
auto hi = lookup_pos(cm, option::get(sp).hi);
150+
auto hi = lookup_char_pos(cm, option::get(sp).hi);
133151
if (hi.col != lo.col) {
134152
// the ^ already takes up one space
135153
auto width = hi.col - lo.col - 1u;
@@ -158,8 +176,8 @@ fn emit_note(&option::t[span] sp, &str msg, &codemap cm) {
158176
type file_lines = rec(str name, uint[] lines);
159177

160178
fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines {
161-
auto lo = codemap::lookup_pos(cm, sp.lo);
162-
auto hi = codemap::lookup_pos(cm, sp.hi);
179+
auto lo = lookup_char_pos(cm, sp.lo);
180+
auto hi = lookup_char_pos(cm, sp.hi);
163181
auto lines = ~[];
164182
for each (uint i in uint::range(lo.line - 1u, hi.line as uint)) {
165183
lines += ~[i];
@@ -168,10 +186,10 @@ fn span_to_lines(span sp, codemap::codemap cm) -> @file_lines {
168186
}
169187

170188
fn get_line(filemap fm, int line, &str file) -> str {
171-
let uint begin = fm.lines.(line) - fm.start_pos;
189+
let uint begin = fm.lines.(line).byte - fm.start_pos.byte;
172190
let uint end;
173191
if (line as uint < ivec::len(fm.lines) - 1u) {
174-
end = fm.lines.(line + 1) - fm.start_pos;
192+
end = fm.lines.(line + 1).byte - fm.start_pos.byte;
175193
} else {
176194
// If we're not done parsing the file, we're at the limit of what's
177195
// parsed. If we just slice the rest of the string, we'll print out

src/comp/syntax/parse/eval.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ type ctx =
2121
mutable str[] deps,
2222
parser::parse_sess sess,
2323
mutable uint chpos,
24+
mutable uint byte_pos,
2425
ast::crate_cfg cfg);
2526

2627
fn eval_crate_directives(ctx cx, &(@ast::crate_directive)[] cdirs,
@@ -56,7 +57,8 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix,
5657
};
5758
if (cx.mode == mode_depend) { cx.deps += ~[full_path]; ret; }
5859
auto p0 =
59-
new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos);
60+
new_parser_from_file(cx.sess, cx.cfg, full_path, cx.chpos,
61+
cx.byte_pos);
6062
auto inner_attrs = parse_inner_attrs_and_next(p0);
6163
auto mod_attrs = attrs + inner_attrs._0;
6264
auto first_item_outer_attrs = inner_attrs._1;
@@ -65,8 +67,9 @@ fn eval_crate_directive(ctx cx, @ast::crate_directive cdir, str prefix,
6567
auto i = syntax::parse::parser::mk_item
6668
(p0, cdir.span.lo, cdir.span.hi, id, ast::item_mod(m0),
6769
mod_attrs);
68-
// Thread defids and chpos through the parsers
70+
// Thread defids, chpos and byte_pos through the parsers
6971
cx.chpos = p0.get_chpos();
72+
cx.byte_pos = p0.get_byte_pos();
7073
items += ~[i];
7174
}
7275
case (ast::cdir_dir_mod(?id, ?dir_opt, ?cdirs, ?attrs)) {

src/comp/syntax/parse/lexer.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type reader =
2424
fn get_mark_str() -> str ;
2525
fn get_interner() -> @interner::interner[str] ;
2626
fn get_chpos() -> uint ;
27+
fn get_byte_pos() -> uint ;
2728
fn get_col() -> uint ;
2829
fn get_filemap() -> codemap::filemap ;
2930
fn err(str) ;
@@ -53,6 +54,7 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
5354
}
5455
fn get_mark_chpos() -> uint { ret mark_chpos; }
5556
fn get_chpos() -> uint { ret chpos; }
57+
fn get_byte_pos() -> uint { ret pos; }
5658
fn curr() -> char { ret ch; }
5759
fn next() -> char {
5860
if (pos < len) {
@@ -70,7 +72,10 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
7072
if (pos < len) {
7173
col += 1u;
7274
chpos += 1u;
73-
if (ch == '\n') { codemap::next_line(fm, chpos); col = 0u; }
75+
if (ch == '\n') {
76+
codemap::next_line(fm, chpos, pos + fm.start_pos.byte);
77+
col = 0u;
78+
}
7479
auto next = str::char_range_at(src, pos);
7580
pos = next._1;
7681
ch = next._0;
@@ -86,7 +91,8 @@ fn new_reader(&codemap::codemap cm, str src, codemap::filemap filemap,
8691
let str[] strs = ~[];
8792
auto rd =
8893
reader(cm, src, str::byte_len(src), 0u, 0u, -1 as char, 0u,
89-
filemap.start_pos, filemap.start_pos, strs, filemap, itr);
94+
filemap.start_pos.ch, filemap.start_pos.ch, strs, filemap,
95+
itr);
9096
rd.init();
9197
ret rd;
9298
}
@@ -737,7 +743,7 @@ fn gather_comments_and_literals(&codemap::codemap cm, str path)
737743
auto srdr = ioivec::file_reader(path);
738744
auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream());
739745
auto itr = @interner::mk[str](str::hash, str::eq);
740-
auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u), itr);
746+
auto rdr = new_reader(cm, src, codemap::new_filemap(path, 0u, 0u), itr);
741747
let cmnt[] comments = ~[];
742748
let lit[] literals = ~[];
743749
let bool first_read = true;

src/comp/syntax/parse/parser.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,18 @@ type parser =
5050
fn get_filemap() -> codemap::filemap ;
5151
fn get_bad_expr_words() -> hashmap[str, ()] ;
5252
fn get_chpos() -> uint ;
53+
fn get_byte_pos() -> uint ;
5354
fn get_id() -> ast::node_id ;
5455
fn get_sess() -> parse_sess;
5556
};
5657

5758
fn new_parser_from_file(parse_sess sess, ast::crate_cfg cfg,
58-
str path, uint pos) -> parser {
59+
str path, uint chpos, uint byte_pos) -> parser {
5960
auto ftype = SOURCE_FILE;
6061
if (str::ends_with(path, ".rc")) { ftype = CRATE_FILE; }
6162
auto srdr = ioivec::file_reader(path);
6263
auto src = str::unsafe_from_bytes_ivec(srdr.read_whole_stream());
63-
auto filemap = codemap::new_filemap(path, pos);
64+
auto filemap = codemap::new_filemap(path, chpos, byte_pos);
6465
sess.cm.files += ~[filemap];
6566
auto itr = @interner::mk(str::hash, str::eq);
6667
auto rdr = lexer::new_reader(sess.cm, src, filemap, itr);
@@ -114,6 +115,7 @@ fn new_parser(parse_sess sess, ast::crate_cfg cfg, lexer::reader rdr,
114115
fn get_filemap() -> codemap::filemap { ret rdr.get_filemap(); }
115116
fn get_bad_expr_words() -> hashmap[str, ()] { ret bad_words; }
116117
fn get_chpos() -> uint { ret rdr.get_chpos(); }
118+
fn get_byte_pos() -> uint { ret rdr.get_byte_pos(); }
117119
fn get_id() -> ast::node_id { ret next_node_id(sess); }
118120
fn get_sess() -> parse_sess { ret sess; }
119121
}
@@ -2378,15 +2380,15 @@ fn parse_native_view(&parser p) -> (@ast::view_item)[] {
23782380

23792381
fn parse_crate_from_source_file(&str input, &ast::crate_cfg cfg,
23802382
&parse_sess sess) -> @ast::crate {
2381-
auto p = new_parser_from_file(sess, cfg, input, 0u);
2383+
auto p = new_parser_from_file(sess, cfg, input, 0u, 0u);
23822384
ret parse_crate_mod(p, cfg, sess);
23832385
}
23842386

23852387
fn parse_crate_from_source_str(&str name, &str source, &ast::crate_cfg cfg,
23862388
&codemap::codemap cm) -> @ast::crate {
23872389
auto sess = @rec(cm=cm, mutable next_id=0);
23882390
auto ftype = SOURCE_FILE;
2389-
auto filemap = codemap::new_filemap(name, 0u);
2391+
auto filemap = codemap::new_filemap(name, 0u, 0u);
23902392
sess.cm.files += ~[filemap];
23912393
auto itr = @interner::mk(str::hash, str::eq);
23922394
auto rdr = lexer::new_reader(sess.cm, source, filemap, itr);
@@ -2504,7 +2506,7 @@ fn parse_crate_directives(&parser p, token::token term,
25042506

25052507
fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg,
25062508
&parse_sess sess) -> @ast::crate {
2507-
auto p = new_parser_from_file(sess, cfg, input, 0u);
2509+
auto p = new_parser_from_file(sess, cfg, input, 0u, 0u);
25082510
auto lo = p.get_lo_pos();
25092511
auto prefix = std::fs::dirname(p.get_filemap().name);
25102512
auto leading_attrs = parse_inner_attrs_and_next(p);
@@ -2517,6 +2519,7 @@ fn parse_crate_from_crate_file(&str input, &ast::crate_cfg cfg,
25172519
mutable deps=deps,
25182520
sess=sess,
25192521
mutable chpos=p.get_chpos(),
2522+
mutable byte_pos=p.get_byte_pos(),
25202523
cfg = p.get_cfg());
25212524
auto m = eval::eval_crate_directives_to_mod(cx, cdirs, prefix);
25222525
auto hi = p.get_hi_pos();

src/comp/syntax/print/pprust.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,8 +1345,8 @@ fn maybe_print_trailing_comment(&ps s, codemap::span span,
13451345
alt (next_comment(s)) {
13461346
case (some(?cmnt)) {
13471347
if (cmnt.style != lexer::trailing) { ret; }
1348-
auto span_line = codemap::lookup_pos(cm, span.hi);
1349-
auto comment_line = codemap::lookup_pos(cm, cmnt.pos);
1348+
auto span_line = codemap::lookup_char_pos(cm, span.hi);
1349+
auto comment_line = codemap::lookup_char_pos(cm, cmnt.pos);
13501350
auto next = cmnt.pos + 1u;
13511351
alt (next_pos) { case (none) { } case (some(?p)) { next = p; } }
13521352
if (span.hi < cmnt.pos && cmnt.pos < next &&

0 commit comments

Comments
 (0)