Changeset 1024 in webkit for trunk/JavaScriptCore/kjs/lexer.cpp
- Timestamp:
- Apr 15, 2002, 4:43:21 PM (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/kjs/lexer.cpp
r798 r1024 19 19 * Boston, MA 02111-1307, USA. 20 20 * 21 * $Id$22 21 */ 23 22 … … 53 52 #include "lexer.lut.h" 54 53 55 extern YYLTYPE yylloc; 54 extern YYLTYPE yylloc; // global bison variable holding token info 56 55 57 56 // a bridge for yacc from the C world to C++ … … 62 61 63 62 Lexer::Lexer() 64 : yylineno( 0),63 : yylineno(1), 65 64 size8(128), size16(128), restrKeyword(false), 66 65 eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0), … … 95 94 void Lexer::setCode(const UChar *c, unsigned int len) 96 95 { 97 yylineno = 0;96 yylineno = 1; 98 97 restrKeyword = false; 99 98 delimited = false; … … 126 125 next3 = (pos + 3 < length) ? code[pos+3].unicode() : 0; 127 126 } 127 } 128 129 // called on each new line 130 void Lexer::nextLine() 131 { 132 yylineno++; 133 #ifndef KJS_PURE_ECMA 134 bol = true; 135 #endif 128 136 } 129 137 … … 167 175 case Start: 168 176 if (isWhiteSpace()) { 169 177 // do nothing 170 178 } else if (current == '/' && next1 == '/') { 171 172 179 shift(1); 180 state = InSingleLineComment; 173 181 } else if (current == '/' && next1 == '*') { 174 175 182 shift(1); 183 state = InMultiLineComment; 176 184 } else if (current == 0) { 177 178 179 180 181 182 183 185 if (!terminator && !delimited) { 186 // automatic semicolon insertion if program incomplete 187 token = ';'; 188 stackToken = 0; 189 setDone(Other); 190 } else 191 setDone(Eof); 184 192 } else if (isLineTerminator()) { 185 yylineno++; 193 nextLine(); 194 terminator = true; 195 if (restrKeyword) { 196 token = ';'; 197 setDone(Other); 198 } 199 } else if (current == '"' || current == '\'') { 200 state = InString; 201 stringType = current; 202 } else if (isIdentLetter(current)) { 203 record16(current); 204 state = InIdentifier; 205 } else if (current == '0') { 206 record8(current); 207 state = InNum0; 208 } else if (isDecimalDigit(current)) { 209 record8(current); 210 state = InNum; 211 } else if (current == '.' && isDecimalDigit(next1)) { 212 record8(current); 213 state = InDecimal; 186 214 #ifndef KJS_PURE_ECMA 187 bol = true; 188 #endif 189 terminator = true; 190 if (restrKeyword) { 191 token = ';'; 192 setDone(Other); 193 } 194 } else if (current == '"' || current == '\'') { 195 state = InString; 196 stringType = current; 197 } else if (isIdentLetter(current)) { 198 record16(current); 199 state = InIdentifier; 200 } else if (current == '0') { 201 record8(current); 202 state = InNum0; 203 } else if (isDecimalDigit(current)) { 204 record8(current); 205 state = InNum; 206 } else if (current == '.' && isDecimalDigit(next1)) { 207 record8(current); 208 state = InDecimal; 209 #ifndef KJS_PURE_ECMA 210 // <!-- marks the beginning of a line comment (for www usage) 211 } else if (bol && current == '<' && next1 == '!' && 212 next2 == '-' && next3 == '-') { 213 shift(3); 214 state = InSingleLineComment; 215 // same of --> 215 // <!-- marks the beginning of a line comment (for www usage) 216 } else if (current == '<' && next1 == '!' && 217 next2 == '-' && next3 == '-') { 218 shift(3); 219 state = InSingleLineComment; 220 // same for --> 216 221 } else if (bol && current == '-' && next1 == '-' && next2 == '>') { 217 218 222 shift(2); 223 state = InSingleLineComment; 219 224 #endif 220 225 } else { 221 222 223 224 225 //cerr << "encountered unknown character" << endl;226 227 226 token = matchPunctuator(current, next1, next2, next3); 227 if (token != -1) { 228 setDone(Other); 229 } else { 230 // cerr << "encountered unknown character" << endl; 231 setDone(Bad); 232 } 228 233 } 229 234 break; 230 235 case InString: 231 236 if (current == stringType) { 232 233 237 shift(1); 238 setDone(String); 234 239 } else if (current == 0 || isLineTerminator()) { 235 240 setDone(Bad); 236 241 } else if (current == '\\') { 237 242 state = InEscapeSequence; 238 243 } else { 239 244 record16(current); 240 245 } 241 246 break; … … 243 248 case InEscapeSequence: 244 249 if (isOctalDigit(current)) { 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 250 if (current >= '0' && current <= '3' && 251 isOctalDigit(next1) && isOctalDigit(next2)) { 252 record16(convertOctal(current, next1, next2)); 253 shift(2); 254 state = InString; 255 } else if (isOctalDigit(current) && isOctalDigit(next1)) { 256 record16(convertOctal('0', current, next1)); 257 shift(1); 258 state = InString; 259 } else if (isOctalDigit(current)) { 260 record16(convertOctal('0', '0', current)); 261 state = InString; 262 } else { 263 setDone(Bad); 264 } 260 265 } else if (current == 'x') 261 266 state = InHexEscape; 262 267 else if (current == 'u') 263 268 state = InUnicodeEscape; 264 269 else { 265 266 270 record16(singleEscape(current)); 271 state = InString; 267 272 } 268 273 break; 269 274 case InHexEscape: 270 275 if (isHexDigit(current) && isHexDigit(next1)) { 271 272 273 276 state = InString; 277 record16(convertHex(current, next1)); 278 shift(1); 274 279 } else if (current == stringType) { 275 276 277 280 record16('x'); 281 shift(1); 282 setDone(String); 278 283 } else { 279 280 281 284 record16('x'); 285 record16(current); 286 state = InString; 282 287 } 283 288 break; 284 289 case InUnicodeEscape: 285 290 if (isHexDigit(current) && isHexDigit(next1) && 286 287 288 289 291 isHexDigit(next2) && isHexDigit(next3)) { 292 record16(convertUnicode(current, next1, next2, next3)); 293 shift(3); 294 state = InString; 290 295 } else if (current == stringType) { 291 292 293 296 record16('u'); 297 shift(1); 298 setDone(String); 294 299 } else { 295 300 setDone(Bad); 296 301 } 297 302 break; 298 303 case InSingleLineComment: 299 304 if (isLineTerminator()) { 300 yylineno++; 301 terminator = true; 302 #ifndef KJS_PURE_ECMA 303 bol = true; 304 #endif 305 if (restrKeyword) { 306 token = ';'; 307 setDone(Other); 308 } else 309 state = Start; 305 nextLine(); 306 terminator = true; 307 if (restrKeyword) { 308 token = ';'; 309 setDone(Other); 310 } else 311 state = Start; 310 312 } else if (current == 0) { 311 313 setDone(Eof); 312 314 } 313 315 break; 314 316 case InMultiLineComment: 315 317 if (current == 0) { 316 318 setDone(Bad); 317 319 } else if (isLineTerminator()) { 318 yylineno++;320 nextLine(); 319 321 } else if (current == '*' && next1 == '/') { 320 321 322 state = Start; 323 shift(1); 322 324 } 323 325 break; 324 326 case InIdentifier: 325 327 if (isIdentLetter(current) || isDecimalDigit(current)) { 326 327 328 record16(current); 329 break; 328 330 } 329 331 setDone(Identifier); … … 331 333 case InNum0: 332 334 if (current == 'x' || current == 'X') { 333 334 335 record8(current); 336 state = InHex; 335 337 } else if (current == '.') { 336 337 338 record8(current); 339 state = InDecimal; 338 340 } else if (current == 'e' || current == 'E') { 339 340 341 record8(current); 342 state = InExponentIndicator; 341 343 } else if (isOctalDigit(current)) { 342 343 344 record8(current); 345 state = InOctal; 344 346 } else if (isDecimalDigit(current)) { 345 347 record8(current); 346 348 state = InDecimal; 347 349 } else { 348 350 setDone(Number); 349 351 } 350 352 break; 351 353 case InHex: 352 354 if (isHexDigit(current)) { 353 355 record8(current); 354 356 } else { 355 357 setDone(Hex); 356 358 } 357 359 break; 358 360 case InOctal: 359 361 if (isOctalDigit(current)) { 360 362 record8(current); 361 363 } 362 364 else if (isDecimalDigit(current)) { … … 364 366 state = InDecimal; 365 367 } else 366 368 setDone(Octal); 367 369 break; 368 370 case InNum: 369 371 if (isDecimalDigit(current)) { 370 372 record8(current); 371 373 } else if (current == '.') { 372 373 374 record8(current); 375 state = InDecimal; 374 376 } else if (current == 'e' || current == 'E') { 375 376 377 record8(current); 378 state = InExponentIndicator; 377 379 } else 378 380 setDone(Number); 379 381 break; 380 382 case InDecimal: 381 383 if (isDecimalDigit(current)) { 382 384 record8(current); 383 385 } else if (current == 'e' || current == 'E') { 384 385 386 record8(current); 387 state = InExponentIndicator; 386 388 } else 387 389 setDone(Number); 388 390 break; 389 391 case InExponentIndicator: 390 392 if (current == '+' || current == '-') { 391 393 record8(current); 392 394 } else if (isDecimalDigit(current)) { 393 394 395 record8(current); 396 state = InExponent; 395 397 } else 396 398 setDone(Bad); 397 399 break; 398 400 case InExponent: 399 401 if (isDecimalDigit(current)) { 400 402 record8(current); 401 403 } else 402 404 setDone(Number); 403 405 break; 404 406 default: … … 506 508 507 509 if (token == CONTINUE || token == BREAK || 508 510 token == RETURN || token == THROW) 509 511 restrKeyword = true; 510 512 break; … … 531 533 { 532 534 return (current == ' ' || current == '\t' || 533 535 current == 0x0b || current == 0x0c); 534 536 } 535 537 … … 549 551 /* TODO: allow other legitimate unicode chars */ 550 552 return (c >= 'a' && c <= 'z' || 551 552 553 c >= 'A' && c <= 'Z' || 554 c == '$' || c == '_'); 553 555 } 554 556 … … 561 563 { 562 564 return (c >= '0' && c <= '9' || 563 564 565 c >= 'a' && c <= 'f' || 566 c >= 'A' && c <= 'F'); 565 567 } 566 568 … … 571 573 572 574 int Lexer::matchPunctuator(unsigned short c1, unsigned short c2, 573 575 unsigned short c3, unsigned short c4) 574 576 { 575 577 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { … … 736 738 { 737 739 return UChar((convertHex(c1) << 4) + convertHex(c2), 738 740 (convertHex(c3) << 4) + convertHex(c4)); 739 741 } 740 742
Note:
See TracChangeset
for help on using the changeset viewer.