Changeset 43358 in webkit for trunk/JavaScriptCore/parser/Lexer.cpp
- Timestamp:
- May 7, 2009, 1:03:48 PM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/parser/Lexer.cpp
r43156 r43358 57 57 static const UChar byteOrderMark = 0xFEFF; 58 58 59 // Values for m_skipLineEnd.60 static const unsigned char SkipLFShift = 0;61 static const unsigned char SkipCRShift = 1;62 static const unsigned char SkipLF = 1 << SkipLFShift;63 static const unsigned char SkipCR = 1 << SkipCRShift;64 65 59 Lexer::Lexer(JSGlobalData* globalData) 66 60 : m_isReparsing(false) … … 77 71 } 78 72 73 inline const UChar* Lexer::currentCharacter() const 74 { 75 return m_code - 4; 76 } 77 79 78 inline int Lexer::currentOffset() const 80 79 { 81 return m_code - 4- m_codeStart;80 return currentCharacter() - m_codeStart; 82 81 } 83 82 … … 155 154 m_code = data + source.startOffset(); 156 155 m_codeEnd = data + source.endOffset(); 157 m_skipLineEnd = 0;158 156 m_error = false; 159 157 m_atLineStart = true; … … 194 192 } 195 193 196 // called on each new line 197 void Lexer::nextLine() 198 { 194 void Lexer::shiftLineTerminator() 195 { 196 ASSERT(isLineTerminator(m_current)); 197 198 // Allow both CRLF and LFCR. 199 if (m_current + m_next1 == '\n' + '\r') 200 shift2(); 201 else 202 shift1(); 203 199 204 ++m_lineNumber; 200 m_atLineStart = true; 201 } 202 203 void Lexer::setDone(State s) 204 { 205 m_state = s; 206 m_done = true; 207 } 208 209 ALWAYS_INLINE JSC::Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer) 210 { 211 m_identifiers.append(JSC::Identifier(m_globalData, buffer.data(), buffer.size())); 205 } 206 207 ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) 208 { 209 m_identifiers.append(Identifier(m_globalData, characters, length)); 212 210 return &m_identifiers.last(); 213 }214 215 ALWAYS_INLINE int Lexer::matchPunctuator(int& charPos)216 {217 switch (m_current) {218 case '>':219 if (m_next1 == '>' && m_next2 == '>') {220 if (m_next3 == '=') {221 shift4();222 return URSHIFTEQUAL;223 }224 shift3();225 return URSHIFT;226 }227 if (m_next1 == '>') {228 if (m_next2 == '=') {229 shift3();230 return RSHIFTEQUAL;231 }232 shift2();233 return RSHIFT;234 }235 if (m_next1 == '=') {236 shift2();237 return GE;238 }239 shift1();240 return '>';241 case '=':242 if (m_next1 == '=') {243 if (m_next2 == '=') {244 shift3();245 return STREQ;246 }247 shift2();248 return EQEQ;249 }250 shift1();251 return '=';252 case '!':253 if (m_next1 == '=') {254 if (m_next2 == '=') {255 shift3();256 return STRNEQ;257 }258 shift2();259 return NE;260 }261 shift1();262 return '!';263 case '<':264 if (m_next1 == '<') {265 if (m_next2 == '=') {266 shift3();267 return LSHIFTEQUAL;268 }269 shift2();270 return LSHIFT;271 }272 if (m_next1 == '=') {273 shift2();274 return LE;275 }276 shift1();277 return '<';278 case '+':279 if (m_next1 == '+') {280 shift2();281 if (m_terminator)282 return AUTOPLUSPLUS;283 return PLUSPLUS;284 }285 if (m_next1 == '=') {286 shift2();287 return PLUSEQUAL;288 }289 shift1();290 return '+';291 case '-':292 if (m_next1 == '-') {293 shift2();294 if (m_terminator)295 return AUTOMINUSMINUS;296 return MINUSMINUS;297 }298 if (m_next1 == '=') {299 shift2();300 return MINUSEQUAL;301 }302 shift1();303 return '-';304 case '*':305 if (m_next1 == '=') {306 shift2();307 return MULTEQUAL;308 }309 shift1();310 return '*';311 case '/':312 if (m_next1 == '=') {313 shift2();314 return DIVEQUAL;315 }316 shift1();317 return '/';318 case '&':319 if (m_next1 == '&') {320 shift2();321 return AND;322 }323 if (m_next1 == '=') {324 shift2();325 return ANDEQUAL;326 }327 shift1();328 return '&';329 case '^':330 if (m_next1 == '=') {331 shift2();332 return XOREQUAL;333 }334 shift1();335 return '^';336 case '%':337 if (m_next1 == '=') {338 shift2();339 return MODEQUAL;340 }341 shift1();342 return '%';343 case '|':344 if (m_next1 == '=') {345 shift2();346 return OREQUAL;347 }348 if (m_next1 == '|') {349 shift2();350 return OR;351 }352 shift1();353 return '|';354 case ',':355 shift1();356 return ',';357 case '~':358 shift1();359 return '~';360 case '?':361 shift1();362 return '?';363 case ':':364 shift1();365 return ':';366 case '.':367 shift1();368 return '.';369 case '(':370 shift1();371 return '(';372 case ')':373 shift1();374 return ')';375 case '[':376 shift1();377 return '[';378 case ']':379 shift1();380 return ']';381 case ';':382 shift1();383 return ';';384 case '{':385 charPos = currentOffset();386 shift1();387 return OPENBRACE;388 case '}':389 charPos = currentOffset();390 shift1();391 return CLOSEBRACE;392 }393 394 return -1;395 }396 397 ALWAYS_INLINE bool Lexer::isLineTerminator()398 {399 bool cr = m_current == '\r';400 bool lf = m_current == '\n';401 m_skipLineEnd |= (cr << SkipLFShift) | (lf << SkipCRShift);402 return cr | lf | ((m_current & ~1) == 0x2028);403 211 } 404 212 … … 429 237 } 430 238 431 static in t singleEscape(int c)239 static inline int singleEscape(int c) 432 240 { 433 241 switch (c) { … … 444 252 case 'r': 445 253 return 0x0D; 446 case '"':447 return 0x22;448 case '\'':449 return 0x27;450 case '\\':451 return 0x5C;452 254 default: 453 255 return c; 454 256 } 455 }456 457 static inline int convertOctal(int c1, int c2, int c3)458 {459 return (c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0';460 257 } 461 258 … … 467 264 } 468 265 266 inline void Lexer::record16(UChar c) 267 { 268 m_buffer16.append(c); 269 } 270 469 271 inline void Lexer::record16(int c) 470 272 { … … 474 276 } 475 277 476 inline void Lexer::record16(UChar c)477 {478 m_buffer16.append(c);479 }480 481 278 int Lexer::lex(void* p1, void* p2) 482 279 { 280 ASSERT(!m_error); 281 ASSERT(m_buffer8.isEmpty()); 282 ASSERT(m_buffer16.isEmpty()); 283 483 284 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); 484 285 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); 485 286 int token = 0; 486 m_state = Start;487 int stringType = 0; // either single or double quotes488 m_buffer8.resize(0);489 m_buffer16.resize(0);490 m_done = false;491 287 m_terminator = false; 492 m_skipLineEnd = 0; 288 289 start: 290 while (isWhiteSpace(m_current)) 291 shift1(); 493 292 494 293 int startOffset = currentOffset(); 495 294 496 while (true) { 497 if (m_skipLineEnd) { 498 if (m_current != '\n') // found \r but not \n afterwards 499 m_skipLineEnd &= ~SkipLF; 500 if (m_current != '\r') // found \n but not \r afterwards 501 m_skipLineEnd &= ~SkipCR; 502 if (m_skipLineEnd) { // found \r\n or \n\r -> eat the second one 503 m_skipLineEnd = 0; 504 shift1(); 505 } 295 if (m_current == -1) { 296 if (!m_terminator && !m_delimited && !m_isReparsing) { 297 // automatic semicolon insertion if program incomplete 298 token = ';'; 299 goto doneSemicolon; 506 300 } 507 switch (m_state) { 508 case Start: 509 startOffset = currentOffset(); 510 if (isWhiteSpace(m_current)) { 511 // do nothing 512 } else if (m_current == '/' && m_next1 == '/') { 513 shift1(); 514 m_state = InSingleLineComment; 515 } else if (m_current == '/' && m_next1 == '*') { 516 shift1(); 517 m_state = InMultiLineComment; 518 } else if (m_current == -1) { 519 if (!m_terminator && !m_delimited && !m_isReparsing) { 520 // automatic semicolon insertion if program incomplete 521 token = ';'; 522 setDone(Other); 523 } else 524 setDone(Eof); 525 } else if (isLineTerminator()) { 526 nextLine(); 527 m_terminator = true; 528 if (lastTokenWasRestrKeyword()) { 529 token = ';'; 530 setDone(Other); 531 } 532 } else if (m_current == '"' || m_current == '\'') { 533 m_state = InString; 534 stringType = m_current; 535 } else if (isIdentStart(m_current)) { 536 record16(m_current); 537 m_state = InIdentifierOrKeyword; 538 } else if (m_current == '\\') 539 m_state = InIdentifierStartUnicodeEscapeStart; 540 else if (m_current == '0') { 541 record8(m_current); 542 m_state = InNum0; 543 } else if (isASCIIDigit(m_current)) { 544 record8(m_current); 545 m_state = InNum; 546 } else if (m_current == '.' && isASCIIDigit(m_next1)) { 547 record8(m_current); 548 m_state = InDecimal; 549 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { 550 // <!-- marks the beginning of a line comment (for www usage) 551 shift3(); 552 m_state = InSingleLineComment; 553 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') { 554 // same for --> 555 shift2(); 556 m_state = InSingleLineComment; 557 } else { 558 token = matchPunctuator(lvalp->intValue); 559 if (token != -1) 560 setDone(Other); 561 else 562 setDone(Bad); 563 } 564 goto stillAtLineStart; 565 case InString: 566 if (m_current == stringType) { 567 shift1(); 568 setDone(String); 569 } else if (isLineTerminator() || m_current == -1) 570 setDone(Bad); 571 else if (m_current == '\\') 572 m_state = InEscapeSequence; 573 else 574 record16(m_current); 575 break; 576 // Escape Sequences inside of strings 577 case InEscapeSequence: 578 if (isASCIIOctalDigit(m_current)) { 579 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { 580 record16(convertOctal(m_current, m_next1, m_next2)); 581 shift2(); 582 m_state = InString; 583 } else if (isASCIIOctalDigit(m_current) && isASCIIOctalDigit(m_next1)) { 584 record16(convertOctal('0', m_current, m_next1)); 585 shift1(); 586 m_state = InString; 587 } else if (isASCIIOctalDigit(m_current)) { 588 record16(convertOctal('0', '0', m_current)); 589 m_state = InString; 590 } else 591 setDone(Bad); 592 } else if (m_current == 'x') 593 m_state = InHexEscape; 594 else if (m_current == 'u') 595 m_state = InUnicodeEscape; 596 else if (isLineTerminator()) { 597 nextLine(); 598 m_state = InString; 599 } else { 600 record16(singleEscape(m_current)); 601 m_state = InString; 602 } 603 break; 604 case InHexEscape: 605 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { 606 m_state = InString; 607 record16(convertHex(m_current, m_next1)); 608 shift1(); 609 } else if (m_current == stringType) { 610 record16('x'); 611 shift1(); 612 setDone(String); 613 } else { 614 record16('x'); 615 record16(m_current); 616 m_state = InString; 617 } 618 break; 619 case InUnicodeEscape: 620 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { 621 record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); 622 shift3(); 623 m_state = InString; 624 } else if (m_current == stringType) { 625 record16('u'); 626 shift1(); 627 setDone(String); 628 } else 629 setDone(Bad); 630 break; 631 case InSingleLineComment: 632 if (isLineTerminator()) { 633 nextLine(); 634 m_terminator = true; 635 if (lastTokenWasRestrKeyword()) { 636 token = ';'; 637 setDone(Other); 638 } else 639 m_state = Start; 640 } else if (m_current == -1) 641 setDone(Eof); 642 goto stillAtLineStart; 643 case InMultiLineComment: 644 if (isLineTerminator()) 645 nextLine(); 646 else if (m_current == '*' && m_next1 == '/') { 647 m_state = Start; 648 shift1(); 649 } else if (m_current == -1) 650 setDone(Bad); 651 break; 652 case InIdentifierOrKeyword: 653 if (isIdentPart(m_current)) { 654 record16(m_current); 655 while (isIdentPart(m_next1)) { 656 shift1(); 657 record16(m_current); 658 } 659 } else if (m_current == '\\') 660 m_state = InIdentifierPartUnicodeEscapeStart; 661 else 662 setDone(IdentifierOrKeyword); 663 break; 664 case InIdentifier: 665 if (isIdentPart(m_current)) { 666 record16(m_current); 667 while (isIdentPart(m_next1)) { 668 shift1(); 669 record16(m_current); 670 } 671 } else if (m_current == '\\') 672 m_state = InIdentifierPartUnicodeEscapeStart; 673 else 674 setDone(Identifier); 675 break; 676 case InNum0: 677 if (m_current == 'x' || m_current == 'X') { 678 record8(m_current); 679 m_state = InHex; 680 } else if (m_current == '.') { 681 record8(m_current); 682 m_state = InDecimal; 683 } else if (m_current == 'e' || m_current == 'E') { 684 record8(m_current); 685 m_state = InExponentIndicator; 686 } else if (isASCIIOctalDigit(m_current)) { 687 record8(m_current); 688 m_state = InOctal; 689 } else if (isASCIIDigit(m_current)) { 690 record8(m_current); 691 m_state = InDecimal; 692 } else 693 setDone(Number); 694 break; 695 case InHex: 696 if (isASCIIHexDigit(m_current)) { 697 record8(m_current); 698 while (isASCIIHexDigit(m_next1)) { 699 shift1(); 700 record8(m_current); 701 } 702 } else 703 setDone(Hex); 704 break; 705 case InOctal: 706 if (isASCIIOctalDigit(m_current)) { 707 record8(m_current); 708 while (isASCIIOctalDigit(m_next1)) { 709 shift1(); 710 record8(m_current); 711 } 712 } else if (isASCIIDigit(m_current)) { 713 record8(m_current); 714 m_state = InDecimal; 715 } else 716 setDone(Octal); 717 break; 718 case InNum: 719 if (isASCIIDigit(m_current)) { 720 record8(m_current); 721 while (isASCIIDigit(m_next1)) { 722 shift1(); 723 record8(m_current); 724 } 725 } else if (m_current == '.') { 726 record8(m_current); 727 m_state = InDecimal; 728 } else if (m_current == 'e' || m_current == 'E') { 729 record8(m_current); 730 m_state = InExponentIndicator; 731 } else 732 setDone(Number); 733 break; 734 case InDecimal: 735 if (isASCIIDigit(m_current)) { 736 record8(m_current); 737 while (isASCIIDigit(m_next1)) { 738 shift1(); 739 record8(m_current); 740 } 741 } else if (m_current == 'e' || m_current == 'E') { 742 record8(m_current); 743 m_state = InExponentIndicator; 744 } else 745 setDone(Number); 746 break; 747 case InExponentIndicator: 748 if (m_current == '+' || m_current == '-') 749 record8(m_current); 750 else if (isASCIIDigit(m_current)) { 751 record8(m_current); 752 m_state = InExponent; 753 } else 754 setDone(Bad); 755 break; 756 case InExponent: 757 if (isASCIIDigit(m_current)) 758 record8(m_current); 759 else 760 setDone(Number); 761 break; 762 case InIdentifierStartUnicodeEscapeStart: 763 if (m_current == 'u') 764 m_state = InIdentifierStartUnicodeEscape; 765 else 766 setDone(Bad); 767 break; 768 case InIdentifierPartUnicodeEscapeStart: 769 if (m_current == 'u') 770 m_state = InIdentifierPartUnicodeEscape; 771 else 772 setDone(Bad); 773 break; 774 case InIdentifierStartUnicodeEscape: 775 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)) { 776 setDone(Bad); 301 return 0; 302 } 303 304 m_delimited = false; 305 switch (m_current) { 306 case '>': 307 if (m_next1 == '>' && m_next2 == '>') { 308 if (m_next3 == '=') { 309 shift4(); 310 token = URSHIFTEQUAL; 777 311 break; 778 312 } 779 token = convertUnicode(m_current, m_next1, m_next2, m_next3);780 313 shift3(); 781 if (!isIdentStart(token)) { 782 setDone(Bad); 314 token = URSHIFT; 315 break; 316 } 317 if (m_next1 == '>') { 318 if (m_next2 == '=') { 319 shift3(); 320 token = RSHIFTEQUAL; 783 321 break; 784 322 } 785 record16(token); 786 m_state = InIdentifier; 787 break; 788 case InIdentifierPartUnicodeEscape: 789 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)) { 790 setDone(Bad); 323 shift2(); 324 token = RSHIFT; 325 break; 326 } 327 if (m_next1 == '=') { 328 shift2(); 329 token = GE; 330 break; 331 } 332 shift1(); 333 token = '>'; 334 break; 335 case '=': 336 if (m_next1 == '=') { 337 if (m_next2 == '=') { 338 shift3(); 339 token = STREQ; 791 340 break; 792 341 } 793 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 794 shift3(); 795 if (!isIdentPart(token)) { 796 setDone(Bad); 342 shift2(); 343 token = EQEQ; 344 break; 345 } 346 shift1(); 347 token = '='; 348 break; 349 case '!': 350 if (m_next1 == '=') { 351 if (m_next2 == '=') { 352 shift3(); 353 token = STRNEQ; 797 354 break; 798 355 } 799 record16(token); 800 m_state = InIdentifier; 801 break; 802 default: 803 ASSERT_NOT_REACHED(); 356 shift2(); 357 token = NE; 358 break; 359 } 360 shift1(); 361 token = '!'; 362 break; 363 case '<': 364 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { 365 // <!-- marks the beginning of a line comment (for www usage) 366 shift4(); 367 goto inSingleLineComment; 368 } 369 if (m_next1 == '<') { 370 if (m_next2 == '=') { 371 shift3(); 372 token = LSHIFTEQUAL; 373 break; 374 } 375 shift2(); 376 token = LSHIFT; 377 break; 378 } 379 if (m_next1 == '=') { 380 shift2(); 381 token = LE; 382 break; 383 } 384 shift1(); 385 token = '<'; 386 break; 387 case '+': 388 if (m_next1 == '+') { 389 shift2(); 390 if (m_terminator) { 391 token = AUTOPLUSPLUS; 392 break; 393 } 394 token = PLUSPLUS; 395 break; 396 } 397 if (m_next1 == '=') { 398 shift2(); 399 token = PLUSEQUAL; 400 break; 401 } 402 shift1(); 403 token = '+'; 404 break; 405 case '-': 406 if (m_next1 == '-') { 407 if (m_atLineStart && m_next2 == '>') { 408 shift3(); 409 goto inSingleLineComment; 410 } 411 shift2(); 412 if (m_terminator) { 413 token = AUTOMINUSMINUS; 414 break; 415 } 416 token = MINUSMINUS; 417 break; 418 } 419 if (m_next1 == '=') { 420 shift2(); 421 token = MINUSEQUAL; 422 break; 423 } 424 shift1(); 425 token = '-'; 426 break; 427 case '*': 428 if (m_next1 == '=') { 429 shift2(); 430 token = MULTEQUAL; 431 break; 432 } 433 shift1(); 434 token = '*'; 435 break; 436 case '/': 437 if (m_next1 == '/') { 438 shift2(); 439 goto inSingleLineComment; 440 } 441 if (m_next1 == '*') 442 goto inMultiLineComment; 443 if (m_next1 == '=') { 444 shift2(); 445 token = DIVEQUAL; 446 break; 447 } 448 shift1(); 449 token = '/'; 450 break; 451 case '&': 452 if (m_next1 == '&') { 453 shift2(); 454 token = AND; 455 break; 456 } 457 if (m_next1 == '=') { 458 shift2(); 459 token = ANDEQUAL; 460 break; 461 } 462 shift1(); 463 token = '&'; 464 break; 465 case '^': 466 if (m_next1 == '=') { 467 shift2(); 468 token = XOREQUAL; 469 break; 470 } 471 shift1(); 472 token = '^'; 473 break; 474 case '%': 475 if (m_next1 == '=') { 476 shift2(); 477 token = MODEQUAL; 478 break; 479 } 480 shift1(); 481 token = '%'; 482 break; 483 case '|': 484 if (m_next1 == '=') { 485 shift2(); 486 token = OREQUAL; 487 break; 488 } 489 if (m_next1 == '|') { 490 shift2(); 491 token = OR; 492 break; 493 } 494 shift1(); 495 token = '|'; 496 break; 497 case '.': 498 if (isASCIIDigit(m_next1)) { 499 record8('.'); 500 shift1(); 501 goto inNumberAfterDecimalPoint; 502 } 503 token = '.'; 504 shift1(); 505 break; 506 case ',': 507 case '~': 508 case '?': 509 case ':': 510 case '(': 511 case ')': 512 case '[': 513 case ']': 514 token = m_current; 515 shift1(); 516 break; 517 case ';': 518 shift1(); 519 m_delimited = true; 520 token = ';'; 521 break; 522 case '{': 523 lvalp->intValue = currentOffset(); 524 shift1(); 525 token = OPENBRACE; 526 break; 527 case '}': 528 lvalp->intValue = currentOffset(); 529 shift1(); 530 m_delimited = true; 531 token = CLOSEBRACE; 532 break; 533 case '\\': 534 goto startIdentifierWithBackslash; 535 case '0': 536 goto startNumberWithZeroDigit; 537 case '1': 538 case '2': 539 case '3': 540 case '4': 541 case '5': 542 case '6': 543 case '7': 544 case '8': 545 case '9': 546 goto startNumber; 547 case '"': 548 case '\'': 549 goto startString; 550 default: 551 if (isIdentStart(m_current)) 552 goto startIdentifierOrKeyword; 553 if (isLineTerminator(m_current)) { 554 shiftLineTerminator(); 555 m_atLineStart = true; 556 m_terminator = true; 557 if (lastTokenWasRestrKeyword()) { 558 token = ';'; 559 goto doneSemicolon; 560 } 561 goto start; 562 } 563 goto returnError; 564 } 565 566 m_atLineStart = false; 567 goto returnToken; 568 569 startString: { 570 int stringQuoteCharacter = m_current; 571 shift1(); 572 573 const UChar* stringStart = currentCharacter(); 574 while (m_current != stringQuoteCharacter) { 575 // Fast check for characters that require special handling. 576 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently 577 // as possible, and lets through all common ASCII characters. 578 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) 579 m_buffer16.append(stringStart, currentCharacter() - stringStart); 580 goto inString; 581 shift1(); 582 } 583 lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); 584 shift1(); 585 m_atLineStart = false; 586 m_delimited = false; 587 token = STRING; 588 goto returnToken; 589 590 inString: 591 while (m_current != stringQuoteCharacter) { 592 if (m_current == '\\') 593 goto inStringEscapeSequence; 594 if (UNLIKELY(isLineTerminator(m_current))) 595 goto returnError; 596 if (UNLIKELY(m_current == -1)) 597 goto returnError; 598 record16(m_current); 599 shift1(); 600 } 601 goto doneString; 602 603 inStringEscapeSequence: 604 shift1(); 605 if (m_current == 'x') { 606 shift1(); 607 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { 608 record16(convertHex(m_current, m_next1)); 609 shift2(); 610 goto inString; 804 611 } 805 806 m_atLineStart = false; 807 808 stillAtLineStart: 809 if (m_done) 810 break; 811 812 shift1(); 813 } 814 815 if (m_state == Number || m_state == Octal || m_state == Hex) { 816 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. 817 if (isIdentStart(m_current)) 818 m_state = Bad; 612 record16('x'); 613 if (m_current == stringQuoteCharacter) 614 goto doneString; 615 goto inString; 616 } 617 if (m_current == 'u') { 618 shift1(); 619 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { 620 record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); 621 shift4(); 622 goto inString; 623 } 624 if (m_current == stringQuoteCharacter) { 625 record16('u'); 626 goto doneString; 627 } 628 goto returnError; 629 } 630 if (isASCIIOctalDigit(m_current)) { 631 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { 632 record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); 633 shift3(); 634 goto inString; 635 } 636 if (isASCIIOctalDigit(m_next1)) { 637 record16((m_current - '0') * 8 + m_next1 - '0'); 638 shift2(); 639 goto inString; 640 } 641 record16(m_current - '0'); 642 shift1(); 643 goto inString; 644 } 645 if (isLineTerminator(m_current)) { 646 shiftLineTerminator(); 647 goto inString; 648 } 649 record16(singleEscape(m_current)); 650 shift1(); 651 goto inString; 652 } 653 654 startIdentifierWithBackslash: 655 shift1(); 656 if (UNLIKELY(m_current != 'u')) 657 goto returnError; 658 shift1(); 659 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) 660 goto returnError; 661 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 662 if (UNLIKELY(!isIdentStart(token))) 663 goto returnError; 664 goto inIdentifierAfterCharacterCheck; 665 666 startIdentifierOrKeyword: { 667 const UChar* identifierStart = currentCharacter(); 668 shift1(); 669 while (isIdentPart(m_current)) 670 shift1(); 671 if (LIKELY(m_current != '\\')) { 672 lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); 673 goto doneIdentifierOrKeyword; 674 } 675 m_buffer16.append(identifierStart, currentCharacter() - identifierStart); 676 } 677 678 do { 679 shift1(); 680 if (UNLIKELY(m_current != 'u')) 681 goto returnError; 682 shift1(); 683 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) 684 goto returnError; 685 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 686 if (UNLIKELY(!isIdentPart(token))) 687 goto returnError; 688 inIdentifierAfterCharacterCheck: 689 record16(token); 690 shift4(); 691 692 while (isIdentPart(m_current)) { 693 record16(m_current); 694 shift1(); 695 } 696 } while (UNLIKELY(m_current == '\\')); 697 goto doneIdentifier; 698 699 inSingleLineComment: 700 while (!isLineTerminator(m_current)) { 701 if (UNLIKELY(m_current == -1)) 702 return 0; 703 shift1(); 704 } 705 shiftLineTerminator(); 706 m_atLineStart = true; 707 m_terminator = true; 708 if (lastTokenWasRestrKeyword()) 709 goto doneSemicolon; 710 goto start; 711 712 inMultiLineComment: 713 shift2(); 714 while (m_current != '*' || m_next1 != '/') { 715 if (isLineTerminator(m_current)) 716 shiftLineTerminator(); 819 717 else { 820 // terminate string 821 m_buffer8.append('\0'); 822 823 if (m_state == Number) 824 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0L); 825 else if (m_state == Hex) { // scan hex numbers 826 double dval = 0; 827 828 const char* p = m_buffer8.data() + 2; 829 while (char c = *p++) { 830 dval *= 16; 831 dval += toASCIIHexValue(c); 832 } 833 834 if (dval >= mantissaOverflowLowerBound) 835 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); 836 837 m_state = Number; 838 lvalp->doubleValue = dval; 839 } else { // scan octal number 840 double dval = 0; 841 842 const char* p = m_buffer8.data() + 1; 843 while (char c = *p++) { 844 dval *= 8; 845 dval += c - '0'; 846 } 847 848 if (dval >= mantissaOverflowLowerBound) 849 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); 850 851 m_state = Number; 852 lvalp->doubleValue = dval; 853 } 718 shift1(); 719 if (UNLIKELY(m_current == -1)) 720 goto returnError; 854 721 } 855 722 } 856 723 shift2(); 724 m_atLineStart = false; 725 goto start; 726 727 startNumberWithZeroDigit: 728 shift1(); 729 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { 730 shift1(); 731 goto inHex; 732 } 733 if (m_current == '.') { 734 record8('0'); 735 record8('.'); 736 shift1(); 737 goto inNumberAfterDecimalPoint; 738 } 739 if ((m_current | 0x20) == 'e') { 740 record8('0'); 741 record8('e'); 742 shift1(); 743 goto inExponentIndicator; 744 } 745 if (isASCIIOctalDigit(m_current)) 746 goto inOctal; 747 if (isASCIIDigit(m_current)) 748 goto startNumber; 749 lvalp->doubleValue = 0; 750 goto doneNumeric; 751 752 inNumberAfterDecimalPoint: 753 while (isASCIIDigit(m_current)) { 754 record8(m_current); 755 shift1(); 756 } 757 if ((m_current | 0x20) == 'e') { 758 record8('e'); 759 shift1(); 760 goto inExponentIndicator; 761 } 762 goto doneNumber; 763 764 inExponentIndicator: 765 if (m_current == '+' || m_current == '-') { 766 record8(m_current); 767 shift1(); 768 } 769 if (!isASCIIDigit(m_current)) 770 goto returnError; 771 do { 772 record8(m_current); 773 shift1(); 774 } while (isASCIIDigit(m_current)); 775 goto doneNumber; 776 777 inOctal: { 778 do { 779 record8(m_current); 780 shift1(); 781 } while (isASCIIOctalDigit(m_current)); 782 if (isASCIIDigit(m_current)) 783 goto startNumber; 784 785 double dval = 0; 786 787 const char* end = m_buffer8.end(); 788 for (const char* p = m_buffer8.data(); p < end; ++p) { 789 dval *= 8; 790 dval += *p - '0'; 791 } 792 if (dval >= mantissaOverflowLowerBound) 793 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); 794 795 m_buffer8.resize(0); 796 797 lvalp->doubleValue = dval; 798 goto doneNumeric; 799 } 800 801 inHex: { 802 do { 803 record8(m_current); 804 shift1(); 805 } while (isASCIIHexDigit(m_current)); 806 807 double dval = 0; 808 809 const char* end = m_buffer8.end(); 810 for (const char* p = m_buffer8.data(); p < end; ++p) { 811 dval *= 16; 812 dval += toASCIIHexValue(*p); 813 } 814 if (dval >= mantissaOverflowLowerBound) 815 dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); 816 817 m_buffer8.resize(0); 818 819 lvalp->doubleValue = dval; 820 goto doneNumeric; 821 } 822 823 startNumber: 824 record8(m_current); 825 shift1(); 826 while (isASCIIDigit(m_current)) { 827 record8(m_current); 828 shift1(); 829 } 830 if (m_current == '.') { 831 record8('.'); 832 shift1(); 833 goto inNumberAfterDecimalPoint; 834 } 835 if ((m_current | 0x20) == 'e') { 836 record8('e'); 837 shift1(); 838 goto inExponentIndicator; 839 } 840 841 // Fall through into doneNumber. 842 843 doneNumber: 844 // Null-terminate string for strtod. 845 m_buffer8.append('\0'); 846 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); 847 m_buffer8.resize(0); 848 849 // Fall through into doneNumeric. 850 851 doneNumeric: 852 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. 853 if (UNLIKELY(isIdentStart(m_current))) 854 goto returnError; 855 856 m_atLineStart = false; 857 857 m_delimited = false; 858 858 token = NUMBER; 859 goto returnToken; 860 861 doneSemicolon: 862 token = ';'; 863 m_delimited = true; 864 goto returnToken; 865 866 doneIdentifier: 867 m_atLineStart = false; 868 m_delimited = false; 869 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 870 m_buffer16.resize(0); 871 token = IDENT; 872 goto returnToken; 873 874 doneIdentifierOrKeyword: { 875 m_atLineStart = false; 876 m_delimited = false; 877 m_buffer16.resize(0); 878 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); 879 token = entry ? entry->lexerValue() : IDENT; 880 goto returnToken; 881 } 882 883 doneString: 884 // Atomize constant strings in case they're later used in property lookup. 885 shift1(); 886 m_atLineStart = false; 887 m_delimited = false; 888 lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 889 m_buffer16.resize(0); 890 token = STRING; 891 892 // Fall through into returnToken. 893 894 returnToken: { 859 895 int lineNumber = m_lineNumber; 860 896 llocp->first_line = lineNumber; … … 863 899 llocp->last_column = currentOffset(); 864 900 865 switch (m_state) {866 case Eof:867 token = 0;868 break;869 case Other:870 m_delimited = (token == '}') | (token == ';');871 break;872 case Identifier:873 lvalp->ident = makeIdentifier(m_buffer16);874 token = IDENT;875 break;876 case IdentifierOrKeyword: {877 lvalp->ident = makeIdentifier(m_buffer16);878 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);879 if (!entry) {880 // Lookup for keyword failed, means this is an identifier.881 token = IDENT;882 break;883 }884 token = entry->lexerValue();885 break;886 }887 case String:888 // Atomize constant strings in case they're later used in property lookup.889 lvalp->ident = makeIdentifier(m_buffer16);890 token = STRING;891 break;892 case Number:893 token = NUMBER;894 break;895 default:896 ASSERT_NOT_REACHED();897 // Fall through.898 case Bad:899 m_error = true;900 return -1;901 }902 903 901 m_lastToken = token; 904 902 return token; 905 903 } 906 904 905 returnError: 906 m_error = true; 907 return -1; 908 } 909 907 910 bool Lexer::scanRegExp() 908 911 { 909 m_buffer16.resize(0); 912 ASSERT(m_buffer16.isEmpty()); 913 910 914 bool lastWasEscape = false; 911 915 bool inBrackets = false; 912 916 913 while ( 1) {914 if (isLineTerminator( ) || m_current == -1)917 while (true) { 918 if (isLineTerminator(m_current) || m_current == -1) 915 919 return false; 916 elseif (m_current != '/' || lastWasEscape || inBrackets) {920 if (m_current != '/' || lastWasEscape || inBrackets) { 917 921 // keep track of '[' and ']' 918 922 if (!lastWasEscape) { … … 938 942 } 939 943 m_flags = UString(m_buffer16); 944 m_buffer16.resize(0); 940 945 941 946 return true;
Note:
See TracChangeset
for help on using the changeset viewer.