Changeset 62366 in webkit for trunk/JavaScriptCore/parser
- Timestamp:
- Jul 1, 2010, 11:17:40 PM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/parser/Lexer.cpp
r62031 r62366 48 48 static const UChar byteOrderMark = 0xFEFF; 49 49 50 enum CharacterTypes { 51 // Types for the main switch 52 CharacterInvalid, 53 54 CharacterAlpha, 55 CharacterZero, 56 CharacterNumber, 57 58 CharacterLineTerminator, 59 CharacterExclamationMark, 60 CharacterSimple, 61 CharacterQuote, 62 CharacterDot, 63 CharacterSlash, 64 CharacterBackSlash, 65 CharacterSemicolon, 66 CharacterOpenBrace, 67 CharacterCloseBrace, 68 69 CharacterAdd, 70 CharacterSub, 71 CharacterMultiply, 72 CharacterModulo, 73 CharacterAnd, 74 CharacterXor, 75 CharacterOr, 76 CharacterLess, 77 CharacterGreater, 78 CharacterEqual, 79 80 // Other types (only one so far) 81 CharacterWhiteSpace, 82 }; 83 84 // 128 ascii codes 85 static unsigned char AsciiCharacters[128] = { 86 /* 0 - Null */ CharacterInvalid, 87 /* 1 - Start of Heading */ CharacterInvalid, 88 /* 2 - Start of Text */ CharacterInvalid, 89 /* 3 - End of Text */ CharacterInvalid, 90 /* 4 - End of Transm. */ CharacterInvalid, 91 /* 5 - Enquiry */ CharacterInvalid, 92 /* 6 - Acknowledgment */ CharacterInvalid, 93 /* 7 - Bell */ CharacterInvalid, 94 /* 8 - Back Space */ CharacterInvalid, 95 /* 9 - Horizontal Tab */ CharacterWhiteSpace, 96 /* 10 - Line Feed */ CharacterLineTerminator, 97 /* 11 - Vertical Tab */ CharacterWhiteSpace, 98 /* 12 - Form Feed */ CharacterWhiteSpace, 99 /* 13 - Carriage Return */ CharacterLineTerminator, 100 /* 14 - Shift Out */ CharacterInvalid, 101 /* 15 - Shift In */ CharacterInvalid, 102 /* 16 - Data Line Escape */ CharacterInvalid, 103 /* 17 - Device Control 1 */ CharacterInvalid, 104 /* 18 - Device Control 2 */ CharacterInvalid, 105 /* 19 - Device Control 3 */ CharacterInvalid, 106 /* 20 - Device Control 4 */ CharacterInvalid, 107 /* 21 - Negative Ack. */ CharacterInvalid, 108 /* 22 - Synchronous Idle */ CharacterInvalid, 109 /* 23 - End of Transmit */ CharacterInvalid, 110 /* 24 - Cancel */ CharacterInvalid, 111 /* 25 - End of Medium */ CharacterInvalid, 112 /* 26 - Substitute */ CharacterInvalid, 113 /* 27 - Escape */ CharacterInvalid, 114 /* 28 - File Separator */ CharacterInvalid, 115 /* 29 - Group Separator */ CharacterInvalid, 116 /* 30 - Record Separator */ CharacterInvalid, 117 /* 31 - Unit Separator */ CharacterInvalid, 118 /* 32 - Space */ CharacterWhiteSpace, 119 /* 33 - ! */ CharacterExclamationMark, 120 /* 34 - " */ CharacterQuote, 121 /* 35 - # */ CharacterInvalid, 122 /* 36 - $ */ CharacterAlpha, 123 /* 37 - % */ CharacterModulo, 124 /* 38 - & */ CharacterAnd, 125 /* 39 - ' */ CharacterQuote, 126 /* 40 - ( */ CharacterSimple, 127 /* 41 - ) */ CharacterSimple, 128 /* 42 - * */ CharacterMultiply, 129 /* 43 - + */ CharacterAdd, 130 /* 44 - , */ CharacterSimple, 131 /* 45 - - */ CharacterSub, 132 /* 46 - . */ CharacterDot, 133 /* 47 - / */ CharacterSlash, 134 /* 48 - 0 */ CharacterZero, 135 /* 49 - 1 */ CharacterNumber, 136 /* 50 - 2 */ CharacterNumber, 137 /* 51 - 3 */ CharacterNumber, 138 /* 52 - 4 */ CharacterNumber, 139 /* 53 - 5 */ CharacterNumber, 140 /* 54 - 6 */ CharacterNumber, 141 /* 55 - 7 */ CharacterNumber, 142 /* 56 - 8 */ CharacterNumber, 143 /* 57 - 9 */ CharacterNumber, 144 /* 58 - : */ CharacterSimple, 145 /* 59 - ; */ CharacterSemicolon, 146 /* 60 - < */ CharacterLess, 147 /* 61 - = */ CharacterEqual, 148 /* 62 - > */ CharacterGreater, 149 /* 63 - ? */ CharacterSimple, 150 /* 64 - @ */ CharacterInvalid, 151 /* 65 - A */ CharacterAlpha, 152 /* 66 - B */ CharacterAlpha, 153 /* 67 - C */ CharacterAlpha, 154 /* 68 - D */ CharacterAlpha, 155 /* 69 - E */ CharacterAlpha, 156 /* 70 - F */ CharacterAlpha, 157 /* 71 - G */ CharacterAlpha, 158 /* 72 - H */ CharacterAlpha, 159 /* 73 - I */ CharacterAlpha, 160 /* 74 - J */ CharacterAlpha, 161 /* 75 - K */ CharacterAlpha, 162 /* 76 - L */ CharacterAlpha, 163 /* 77 - M */ CharacterAlpha, 164 /* 78 - N */ CharacterAlpha, 165 /* 79 - O */ CharacterAlpha, 166 /* 80 - P */ CharacterAlpha, 167 /* 81 - Q */ CharacterAlpha, 168 /* 82 - R */ CharacterAlpha, 169 /* 83 - S */ CharacterAlpha, 170 /* 84 - T */ CharacterAlpha, 171 /* 85 - U */ CharacterAlpha, 172 /* 86 - V */ CharacterAlpha, 173 /* 87 - W */ CharacterAlpha, 174 /* 88 - X */ CharacterAlpha, 175 /* 89 - Y */ CharacterAlpha, 176 /* 90 - Z */ CharacterAlpha, 177 /* 91 - [ */ CharacterSimple, 178 /* 92 - \ */ CharacterBackSlash, 179 /* 93 - ] */ CharacterSimple, 180 /* 94 - ^ */ CharacterXor, 181 /* 95 - _ */ CharacterAlpha, 182 /* 96 - ` */ CharacterInvalid, 183 /* 97 - a */ CharacterAlpha, 184 /* 98 - b */ CharacterAlpha, 185 /* 99 - c */ CharacterAlpha, 186 /* 100 - d */ CharacterAlpha, 187 /* 101 - e */ CharacterAlpha, 188 /* 102 - f */ CharacterAlpha, 189 /* 103 - g */ CharacterAlpha, 190 /* 104 - h */ CharacterAlpha, 191 /* 105 - i */ CharacterAlpha, 192 /* 106 - j */ CharacterAlpha, 193 /* 107 - k */ CharacterAlpha, 194 /* 108 - l */ CharacterAlpha, 195 /* 109 - m */ CharacterAlpha, 196 /* 110 - n */ CharacterAlpha, 197 /* 111 - o */ CharacterAlpha, 198 /* 112 - p */ CharacterAlpha, 199 /* 113 - q */ CharacterAlpha, 200 /* 114 - r */ CharacterAlpha, 201 /* 115 - s */ CharacterAlpha, 202 /* 116 - t */ CharacterAlpha, 203 /* 117 - u */ CharacterAlpha, 204 /* 118 - v */ CharacterAlpha, 205 /* 119 - w */ CharacterAlpha, 206 /* 120 - x */ CharacterAlpha, 207 /* 121 - y */ CharacterAlpha, 208 /* 122 - z */ CharacterAlpha, 209 /* 123 - { */ CharacterOpenBrace, 210 /* 124 - | */ CharacterOr, 211 /* 125 - } */ CharacterCloseBrace, 212 /* 126 - ~ */ CharacterSimple, 213 /* 127 - Delete */ CharacterInvalid, 214 }; 215 50 216 Lexer::Lexer(JSGlobalData* globalData) 51 217 : m_isReparsing(false) … … 264 430 int startOffset = currentOffset(); 265 431 266 if ( m_current == -1) {432 if (UNLIKELY(m_current == -1)) { 267 433 if (!m_terminator && !m_delimited && !m_isReparsing) { 268 434 // automatic semicolon insertion if program incomplete … … 274 440 275 441 m_delimited = false; 276 switch (m_current) { 277 case '>': 278 shift(); 279 if (m_current == '>') { 442 ASSERT(m_current >= 0); 443 444 if (m_current < 128) { 445 ASSERT(isASCII(m_current)); 446 447 switch (AsciiCharacters[m_current]) { 448 case CharacterGreater: 280 449 shift(); 281 450 if (m_current == '>') { 282 451 shift(); 452 if (m_current == '>') { 453 shift(); 454 if (m_current == '=') { 455 shift(); 456 token = URSHIFTEQUAL; 457 break; 458 } 459 token = URSHIFT; 460 break; 461 } 283 462 if (m_current == '=') { 284 463 shift(); 285 token = URSHIFTEQUAL;464 token = RSHIFTEQUAL; 286 465 break; 287 466 } 288 token = URSHIFT;467 token = RSHIFT; 289 468 break; 290 469 } 291 470 if (m_current == '=') { 292 471 shift(); 293 token = RSHIFTEQUAL; 294 break; 295 } 296 token = RSHIFT; 297 break; 298 } 299 if (m_current == '=') { 300 shift(); 301 token = GE; 302 break; 303 } 304 token = '>'; 305 break; 306 case '=': 307 shift(); 308 if (m_current == '=') { 472 token = GE; 473 break; 474 } 475 token = '>'; 476 break; 477 case CharacterEqual: 309 478 shift(); 310 479 if (m_current == '=') { 311 480 shift(); 312 token = STREQ; 313 break; 314 } 315 token = EQEQ; 316 break; 317 } 318 token = '='; 319 break; 320 case '!': 321 shift(); 322 if (m_current == '=') { 323 shift(); 481 if (m_current == '=') { 482 shift(); 483 token = STREQ; 484 break; 485 } 486 token = EQEQ; 487 break; 488 } 489 token = '='; 490 break; 491 case CharacterLess: 492 shift(); 493 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') { 494 // <!-- marks the beginning of a line comment (for www usage) 495 goto inSingleLineComment; 496 } 497 if (m_current == '<') { 498 shift(); 499 if (m_current == '=') { 500 shift(); 501 token = LSHIFTEQUAL; 502 break; 503 } 504 token = LSHIFT; 505 break; 506 } 324 507 if (m_current == '=') { 325 508 shift(); 326 token = STRNEQ; 327 break; 328 } 329 token = NE; 330 break; 331 } 332 token = '!'; 333 break; 334 case '<': 335 shift(); 336 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') { 337 // <!-- marks the beginning of a line comment (for www usage) 338 goto inSingleLineComment; 339 } 340 if (m_current == '<') { 509 token = LE; 510 break; 511 } 512 token = '<'; 513 break; 514 case CharacterExclamationMark: 341 515 shift(); 342 516 if (m_current == '=') { 343 517 shift(); 344 token = LSHIFTEQUAL; 345 break; 346 } 347 token = LSHIFT; 348 break; 349 } 350 if (m_current == '=') { 351 shift(); 352 token = LE; 353 break; 354 } 355 token = '<'; 356 break; 357 case '+': 358 shift(); 359 if (m_current == '+') { 360 shift(); 361 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS; 362 break; 363 } 364 if (m_current == '=') { 365 shift(); 366 token = PLUSEQUAL; 367 break; 368 } 369 token = '+'; 370 break; 371 case '-': 372 shift(); 373 if (m_current == '-') { 374 shift(); 375 if (m_atLineStart && m_current == '>') { 518 if (m_current == '=') { 519 shift(); 520 token = STRNEQ; 521 break; 522 } 523 token = NE; 524 break; 525 } 526 token = '!'; 527 break; 528 case CharacterAdd: 529 shift(); 530 if (m_current == '+') { 531 shift(); 532 token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS; 533 break; 534 } 535 if (m_current == '=') { 536 shift(); 537 token = PLUSEQUAL; 538 break; 539 } 540 token = '+'; 541 break; 542 case CharacterSub: 543 shift(); 544 if (m_current == '-') { 545 shift(); 546 if (m_atLineStart && m_current == '>') { 547 shift(); 548 goto inSingleLineComment; 549 } 550 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS; 551 break; 552 } 553 if (m_current == '=') { 554 shift(); 555 token = MINUSEQUAL; 556 break; 557 } 558 token = '-'; 559 break; 560 case CharacterMultiply: 561 shift(); 562 if (m_current == '=') { 563 shift(); 564 token = MULTEQUAL; 565 break; 566 } 567 token = '*'; 568 break; 569 case CharacterSlash: 570 shift(); 571 if (m_current == '/') { 376 572 shift(); 377 573 goto inSingleLineComment; 378 574 } 379 token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS; 380 break; 381 } 382 if (m_current == '=') { 383 shift(); 384 token = MINUSEQUAL; 385 break; 386 } 387 token = '-'; 388 break; 389 case '*': 390 shift(); 391 if (m_current == '=') { 392 shift(); 393 token = MULTEQUAL; 394 break; 395 } 396 token = '*'; 397 break; 398 case '/': 399 shift(); 400 if (m_current == '/') { 401 shift(); 402 goto inSingleLineComment; 403 } 404 if (m_current == '*') { 405 shift(); 406 goto inMultiLineComment; 407 } 408 if (m_current == '=') { 409 shift(); 410 token = DIVEQUAL; 411 break; 412 } 413 token = '/'; 414 break; 415 case '&': 416 shift(); 417 if (m_current == '&') { 418 shift(); 419 token = AND; 420 break; 421 } 422 if (m_current == '=') { 423 shift(); 424 token = ANDEQUAL; 425 break; 426 } 427 token = '&'; 428 break; 429 case '^': 430 shift(); 431 if (m_current == '=') { 432 shift(); 433 token = XOREQUAL; 434 break; 435 } 436 token = '^'; 437 break; 438 case '%': 439 shift(); 440 if (m_current == '=') { 441 shift(); 442 token = MODEQUAL; 443 break; 444 } 445 token = '%'; 446 break; 447 case '|': 448 shift(); 449 if (m_current == '=') { 450 shift(); 451 token = OREQUAL; 452 break; 453 } 454 if (m_current == '|') { 455 shift(); 456 token = OR; 457 break; 458 } 459 token = '|'; 460 break; 461 case '.': 462 shift(); 463 if (isASCIIDigit(m_current)) { 464 record8('.'); 465 goto inNumberAfterDecimalPoint; 466 } 467 token = '.'; 468 break; 469 case ',': 470 case '~': 471 case '?': 472 case ':': 473 case '(': 474 case ')': 475 case '[': 476 case ']': 477 token = m_current; 478 shift(); 479 break; 480 case ';': 481 m_delimited = true; 482 shift(); 483 token = ';'; 484 break; 485 case '{': 486 lvalp->intValue = currentOffset(); 487 shift(); 488 token = OPENBRACE; 489 break; 490 case '}': 491 lvalp->intValue = currentOffset(); 492 m_delimited = true; 493 shift(); 494 token = CLOSEBRACE; 495 break; 496 case '\\': 497 goto startIdentifierWithBackslash; 498 case '0': 499 goto startNumberWithZeroDigit; 500 case '1': 501 case '2': 502 case '3': 503 case '4': 504 case '5': 505 case '6': 506 case '7': 507 case '8': 508 case '9': 509 goto startNumber; 510 case '"': 511 case '\'': 512 goto startString; 513 default: 514 if (isIdentStart(m_current)) 575 if (m_current == '*') { 576 shift(); 577 goto inMultiLineComment; 578 } 579 if (m_current == '=') { 580 shift(); 581 token = DIVEQUAL; 582 break; 583 } 584 token = '/'; 585 break; 586 case CharacterAnd: 587 shift(); 588 if (m_current == '&') { 589 shift(); 590 token = AND; 591 break; 592 } 593 if (m_current == '=') { 594 shift(); 595 token = ANDEQUAL; 596 break; 597 } 598 token = '&'; 599 break; 600 case CharacterXor: 601 shift(); 602 if (m_current == '=') { 603 shift(); 604 token = XOREQUAL; 605 break; 606 } 607 token = '^'; 608 break; 609 case CharacterModulo: 610 shift(); 611 if (m_current == '=') { 612 shift(); 613 token = MODEQUAL; 614 break; 615 } 616 token = '%'; 617 break; 618 case CharacterOr: 619 shift(); 620 if (m_current == '=') { 621 shift(); 622 token = OREQUAL; 623 break; 624 } 625 if (m_current == '|') { 626 shift(); 627 token = OR; 628 break; 629 } 630 token = '|'; 631 break; 632 case CharacterDot: 633 shift(); 634 if (isASCIIDigit(m_current)) { 635 record8('.'); 636 goto inNumberAfterDecimalPoint; 637 } 638 token = '.'; 639 break; 640 case CharacterSimple: 641 token = m_current; 642 shift(); 643 break; 644 case CharacterSemicolon: 645 m_delimited = true; 646 shift(); 647 token = ';'; 648 break; 649 case CharacterOpenBrace: 650 lvalp->intValue = currentOffset(); 651 shift(); 652 token = OPENBRACE; 653 break; 654 case CharacterCloseBrace: 655 lvalp->intValue = currentOffset(); 656 m_delimited = true; 657 shift(); 658 token = CLOSEBRACE; 659 break; 660 case CharacterBackSlash: 661 goto startIdentifierWithBackslash; 662 case CharacterZero: 663 goto startNumberWithZeroDigit; 664 case CharacterNumber: 665 goto startNumber; 666 case CharacterQuote: 667 goto startString; 668 case CharacterAlpha: 669 ASSERT(isIdentStart(m_current)); 670 goto startIdentifierOrKeyword; 671 case CharacterLineTerminator: 672 ASSERT(isLineTerminator(m_current)); 673 shiftLineTerminator(); 674 m_atLineStart = true; 675 m_terminator = true; 676 if (lastTokenWasRestrKeyword()) { 677 token = ';'; 678 goto doneSemicolon; 679 } 680 goto start; 681 case CharacterInvalid: 682 goto returnError; 683 default: 684 ASSERT_NOT_REACHED(); 685 goto returnError; 686 } 687 } else { 688 // Rare characters 689 ASSERT(!isASCII(m_current)); 690 691 if (isNonASCIIIdentStart(m_current)) 515 692 goto startIdentifierOrKeyword; 516 693 if (isLineTerminator(m_current)) {
Note:
See TracChangeset
for help on using the changeset viewer.