Changeset 99436 in webkit for trunk/Source/JavaScriptCore/parser/Lexer.cpp
- Timestamp:
- Nov 7, 2011, 9:54:15 AM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/parser/Lexer.cpp
r98887 r99436 227 227 }; 228 228 229 Lexer::Lexer(JSGlobalData* globalData) 229 template <typename T> 230 Lexer<T>::Lexer(JSGlobalData* globalData) 230 231 : m_isReparsing(false) 231 232 , m_globalData(globalData) … … 233 234 } 234 235 235 Lexer::~Lexer() 236 { 237 } 238 239 UString Lexer::getInvalidCharMessage() 236 template <typename T> 237 Lexer<T>::~Lexer() 238 { 239 } 240 241 template <typename T> 242 UString Lexer<T>::getInvalidCharMessage() 240 243 { 241 244 switch (m_current) { … … 259 262 } 260 263 261 ALWAYS_INLINE const UChar* Lexer::currentCharacter() const 264 template <typename T> 265 ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const 262 266 { 263 267 ASSERT(m_code <= m_codeEnd); … … 265 269 } 266 270 267 ALWAYS_INLINE int Lexer::currentOffset() const 268 { 269 return currentCharacter() - m_codeStart; 270 } 271 272 void Lexer::setCode(const SourceCode& source, ParserArena* arena) 271 template <typename T> 272 void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena) 273 273 { 274 274 m_arena = &arena->identifierArena(); 275 275 276 276 m_lineNumber = source.firstLine(); 277 277 m_delimited = false; 278 278 m_lastToken = -1; 279 280 const UChar* data = source.provider()->data(); 279 280 const StringImpl* sourceString = source.provider()->stringData(); 281 282 if (sourceString) 283 setCodeStart(sourceString); 284 else 285 m_codeStart = 0; 281 286 282 287 m_source = &source; 283 m_codeStart = data; 284 m_code = data + source.startOffset(); 285 m_codeEnd = data + source.endOffset(); 288 m_code = m_codeStart + source.startOffset(); 289 m_codeEnd = m_codeStart + source.endOffset(); 286 290 m_error = false; 287 291 m_atLineStart = true; 288 292 m_lexErrorMessage = UString(); 289 293 290 294 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); 291 295 m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2); 292 296 293 297 if (LIKELY(m_code < m_codeEnd)) 294 298 m_current = *m_code; … … 298 302 } 299 303 300 template <int shiftAmount, Lexer::ShiftType shouldBoundsCheck> ALWAYS_INLINE void Lexer::internalShift() 301 { 302 if (shouldBoundsCheck == DoBoundsCheck) { 303 // Faster than an if-else sequence 304 ASSERT(m_current != -1); 305 m_current = -1; 306 m_code += shiftAmount; 307 if (LIKELY(m_code < m_codeEnd)) 308 m_current = *m_code; 309 } else { 310 m_code += shiftAmount; 304 template <typename T> 305 template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift() 306 { 307 m_code += shiftAmount; 308 m_current = *m_code; 309 } 310 311 template <typename T> 312 ALWAYS_INLINE void Lexer<T>::shift() 313 { 314 // Faster than an if-else sequence 315 ASSERT(m_current != -1); 316 m_current = -1; 317 m_code++; 318 if (LIKELY(m_code < m_codeEnd)) 311 319 m_current = *m_code; 312 } 313 } 314 315 ALWAYS_INLINE void Lexer::shift() 316 { 317 internalShift<1, DoBoundsCheck>(); 318 } 319 320 ALWAYS_INLINE int Lexer::peek(int offset) 320 } 321 322 template <typename T> 323 ALWAYS_INLINE int Lexer<T>::peek(int offset) 321 324 { 322 325 // Only use if necessary 323 326 ASSERT(offset > 0 && offset < 5); 324 const UChar* code = m_code + offset;327 const T* code = m_code + offset; 325 328 return (code < m_codeEnd) ? *code : -1; 326 329 } 327 330 328 int Lexer::getUnicodeCharacter() 331 template <typename T> 332 int Lexer<T>::getUnicodeCharacter() 329 333 { 330 334 int char1 = peek(1); … … 343 347 } 344 348 345 void Lexer::shiftLineTerminator() 349 template <typename T> 350 void Lexer<T>::shiftLineTerminator() 346 351 { 347 352 ASSERT(isLineTerminator(m_current)); … … 357 362 } 358 363 359 ALWAYS_INLINE bool Lexer::lastTokenWasRestrKeyword() const 364 template <typename T> 365 ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const 360 366 { 361 367 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; … … 412 418 } 413 419 414 inline void Lexer::record8(int c) 420 template <typename T> 421 inline void Lexer<T>::record8(int c) 415 422 { 416 423 ASSERT(c >= 0); 417 424 ASSERT(c <= 0xFF); 418 m_buffer8.append(static_cast<char>(c)); 419 } 420 421 inline void Lexer::record16(UChar c) 425 m_buffer8.append(static_cast<LChar>(c)); 426 } 427 428 template <typename T> 429 inline void Lexer<T>::append8(const T* p, size_t length) 430 { 431 // FIXME: Change three occurrances of m_buffer16 to m_buffer8 and 432 // UChar to LChar when 8 bit strings are turned on. 433 size_t currentSize = m_buffer16.size(); 434 m_buffer16.grow(currentSize + length); 435 UChar* rawBuffer = m_buffer16.data() + currentSize; 436 437 for (size_t i = 0; i < length; i++) { 438 T c = p[i]; 439 ASSERT(c >= 0); 440 ASSERT(c <= 0xFF); 441 rawBuffer[i] = c; 442 } 443 } 444 445 template <typename T> 446 inline void Lexer<T>::append16(const LChar* p, size_t length) 447 { 448 size_t currentSize = m_buffer16.size(); 449 m_buffer16.grow(currentSize + length); 450 UChar* rawBuffer = m_buffer16.data() + currentSize; 451 452 for (size_t i = 0; i < length; i++) 453 rawBuffer[i] = p[i]; 454 } 455 456 template <typename T> 457 inline void Lexer<T>::record16(T c) 422 458 { 423 459 m_buffer16.append(c); 424 460 } 425 461 426 inline void Lexer::record16(int c) 462 template <typename T> 463 inline void Lexer<T>::record16(int c) 427 464 { 428 465 ASSERT(c >= 0); 429 466 ASSERT(c <= USHRT_MAX); 430 record16(UChar(static_cast<unsigned short>(c))); 431 } 432 433 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* tokenData, unsigned lexType, bool strictMode) 467 m_buffer16.append(static_cast<UChar>(c)); 468 } 469 470 template <> 471 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) 434 472 { 435 473 const ptrdiff_t remaining = m_codeEnd - m_code; 436 if ((remaining >= maxTokenLength) && !(lex Type &IgnoreReservedWords)) {474 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) { 437 475 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData); 438 476 if (keyword != IDENT) { … … 441 479 } 442 480 } 481 482 const LChar* identifierStart = currentCharacter(); 483 484 while (isIdentPart(m_current)) 485 shift(); 486 487 if (UNLIKELY(m_current == '\\')) { 488 setOffsetFromCharOffset(identifierStart); 489 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode); 490 } 491 492 const Identifier* ident = 0; 493 494 if (shouldCreateIdentifier) { 495 int identifierLength = currentCharacter() - identifierStart; 496 ident = makeIdentifier(identifierStart, identifierLength); 497 498 tokenData->ident = ident; 499 } else 500 tokenData->ident = 0; 501 502 m_delimited = false; 503 504 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { 505 ASSERT(shouldCreateIdentifier); 506 if (remaining < maxTokenLength) { 507 const HashEntry* entry = m_globalData->keywords->getKeyword(*ident); 508 ASSERT((remaining < maxTokenLength) || !entry); 509 if (!entry) 510 return IDENT; 511 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue()); 512 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT; 513 } 514 return IDENT; 515 } 516 517 return IDENT; 518 } 519 520 template <> 521 template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) 522 { 523 const ptrdiff_t remaining = m_codeEnd - m_code; 524 if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) { 525 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData); 526 if (keyword != IDENT) { 527 ASSERT((!shouldCreateIdentifier) || tokenData->ident); 528 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword; 529 } 530 } 443 531 const UChar* identifierStart = currentCharacter(); 532 533 UChar orAllChars = 0; 534 535 while (isIdentPart(m_current)) { 536 orAllChars |= m_current; 537 shift(); 538 } 539 540 if (UNLIKELY(m_current == '\\')) { 541 setOffsetFromCharOffset(identifierStart); 542 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode); 543 } 544 545 bool isAll8Bit = false; 546 547 #if 0 // FIXME: Remove this #if when 8 bit strings are turned on. 548 if (!(orAllChars & ~0xff)) 549 isAll8Bit = true; 550 #endif 551 552 const Identifier* ident = 0; 553 554 if (shouldCreateIdentifier) { 555 int identifierLength = currentCharacter() - identifierStart; 556 if (isAll8Bit) 557 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength); 558 else 559 ident = makeIdentifier(identifierStart, identifierLength); 560 561 tokenData->ident = ident; 562 } else 563 tokenData->ident = 0; 564 565 m_delimited = false; 566 567 if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { 568 ASSERT(shouldCreateIdentifier); 569 if (remaining < maxTokenLength) { 570 const HashEntry* entry = m_globalData->keywords->getKeyword(*ident); 571 ASSERT((remaining < maxTokenLength) || !entry); 572 if (!entry) 573 return IDENT; 574 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue()); 575 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT; 576 } 577 return IDENT; 578 } 579 580 return IDENT; 581 } 582 583 template <typename T> 584 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) 585 { 586 const ptrdiff_t remaining = m_codeEnd - m_code; 587 const T* identifierStart = currentCharacter(); 444 588 bool bufferRequired = false; 445 589 … … 465 609 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character))) 466 610 return ERRORTOK; 467 if 611 if (shouldCreateIdentifier) 468 612 record16(character); 469 613 identifierStart = currentCharacter(); 470 614 } 471 615 472 616 int identifierLength; 473 617 const Identifier* ident = 0; 474 618 if (shouldCreateIdentifier) { 475 if (!bufferRequired) 619 if (!bufferRequired) { 476 620 identifierLength = currentCharacter() - identifierStart; 477 else { 621 ident = makeIdentifier(identifierStart, identifierLength); 622 } else { 478 623 if (identifierStart != currentCharacter()) 479 624 m_buffer16.append(identifierStart, currentCharacter() - identifierStart); 480 identifierStart = m_buffer16.data(); 481 identifierLength = m_buffer16.size(); 482 } 483 484 ident = makeIdentifier(identifierStart, identifierLength); 625 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 626 } 627 485 628 tokenData->ident = ident; 486 629 } else … … 489 632 m_delimited = false; 490 633 491 if (LIKELY(!bufferRequired && !(lex Type &IgnoreReservedWords))) {634 if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { 492 635 ASSERT(shouldCreateIdentifier); 493 636 // Keywords must not be recognized if there was an \uXXXX in the identifier. … … 507 650 } 508 651 509 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* tokenData, bool strictMode) 510 { 652 template <typename T> 653 template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode) 654 { 655 // FIXME: Change record16 and m_buffer16 to record8 and m_buffer8 below when 656 // 8 bit strings are turned on. 657 int startingOffset = currentOffset(); 658 int startingLineNumber = lineNumber(); 511 659 int stringQuoteCharacter = m_current; 512 660 shift(); 513 661 514 const UChar* stringStart = currentCharacter(); 662 const T* stringStart = currentCharacter(); 663 664 while (m_current != stringQuoteCharacter) { 665 if (UNLIKELY((m_current == '\\'))) { 666 if (stringStart != currentCharacter() && shouldBuildStrings) 667 append8(stringStart, currentCharacter() - stringStart); 668 shift(); 669 670 int escape = singleEscape(m_current); 671 672 // Most common escape sequences first 673 if (escape) { 674 if (shouldBuildStrings) 675 record16(escape); // FIXME: Change to record8 676 shift(); 677 } else if (UNLIKELY(isLineTerminator(m_current))) 678 shiftLineTerminator(); 679 else if (m_current == 'x') { 680 shift(); 681 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) { 682 int prev = m_current; 683 shift(); 684 if (shouldBuildStrings) 685 record16(convertHex(prev, m_current)); // FIXME: Change to record8 686 shift(); 687 } else if (shouldBuildStrings) 688 record16('x'); // FIXME: Change to record8 689 } else { 690 setOffset(startingOffset); 691 setLineNumber(startingLineNumber); 692 m_buffer16.resize(0); // FIXME: Change to m_buffer8 693 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode); 694 } 695 stringStart = currentCharacter(); 696 continue; 697 } 698 699 if (UNLIKELY(((m_current > 0xff) || (m_current < 0xe)))) { 700 setOffset(startingOffset); 701 setLineNumber(startingLineNumber); 702 m_buffer16.resize(0); // FIXME: Change to m_buffer8 703 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode); 704 } 705 706 shift(); 707 } 708 709 if (currentCharacter() != stringStart && shouldBuildStrings) 710 append8(stringStart, currentCharacter() - stringStart); 711 if (shouldBuildStrings) { 712 // FIXME: Change to m_buffer8 713 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); 714 // FIXME: Change to m_buffer8 715 m_buffer16.resize(0); 716 } else 717 tokenData->ident = 0; 718 719 return true; 720 } 721 722 template <typename T> 723 template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) 724 { 725 int stringQuoteCharacter = m_current; 726 shift(); 727 728 const T* stringStart = currentCharacter(); 515 729 516 730 while (m_current != stringQuoteCharacter) { 517 731 if (UNLIKELY(m_current == '\\')) { 518 732 if (stringStart != currentCharacter() && shouldBuildStrings) 519 m_buffer16.append(stringStart, currentCharacter() - stringStart);733 append16(stringStart, currentCharacter() - stringStart); 520 734 shift(); 521 735 … … 524 738 // Most common escape sequences first 525 739 if (escape) { 526 527 740 if (shouldBuildStrings) 741 record16(escape); 528 742 shift(); 529 743 } else if (UNLIKELY(isLineTerminator(m_current))) … … 609 823 610 824 if (currentCharacter() != stringStart && shouldBuildStrings) 611 m_buffer16.append(stringStart, currentCharacter() - stringStart);825 append16(stringStart, currentCharacter() - stringStart); 612 826 if (shouldBuildStrings) 613 827 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); … … 619 833 } 620 834 621 ALWAYS_INLINE void Lexer::parseHex(double& returnValue) 835 template <typename T> 836 ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue) 622 837 { 623 838 // Optimization: most hexadecimal values fit into 4 bytes. … … 658 873 } 659 874 660 ALWAYS_INLINE bool Lexer::parseOctal(double& returnValue) 875 template <typename T> 876 ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue) 661 877 { 662 878 // Optimization: most octal values fit into 4 bytes. … … 665 881 // Temporary buffer for the digits. Makes easier 666 882 // to reconstruct the input characters when needed. 667 char digits[10];883 LChar digits[10]; 668 884 669 885 do { … … 694 910 } 695 911 696 ALWAYS_INLINE bool Lexer::parseDecimal(double& returnValue) 912 template <typename T> 913 ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue) 697 914 { 698 915 // Optimization: most decimal values fit into 4 bytes. … … 705 922 // Temporary buffer for the digits. Makes easier 706 923 // to reconstruct the input characters when needed. 707 char digits[10];924 LChar digits[10]; 708 925 709 926 do { … … 731 948 } 732 949 733 ALWAYS_INLINE void Lexer::parseNumberAfterDecimalPoint() 950 template <typename T> 951 ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint() 734 952 { 735 953 record8('.'); … … 740 958 } 741 959 742 ALWAYS_INLINE bool Lexer::parseNumberAfterExponentIndicator() 960 template <typename T> 961 ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator() 743 962 { 744 963 record8('e'); … … 759 978 } 760 979 761 ALWAYS_INLINE bool Lexer::parseMultilineComment() 980 template <typename T> 981 ALWAYS_INLINE bool Lexer<T>::parseMultilineComment() 762 982 { 763 983 while (true) { … … 781 1001 } 782 1002 783 bool Lexer::nextTokenIsColon() 784 { 785 const UChar* code = m_code; 1003 template <typename T> 1004 bool Lexer<T>::nextTokenIsColon() 1005 { 1006 const T* code = m_code; 786 1007 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code))) 787 1008 code++; 788 1009 789 1010 return code < m_codeEnd && *code == ':'; 790 1011 } 791 1012 792 JSTokenType Lexer::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexType, bool strictMode) 1013 template <typename T> 1014 JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode) 793 1015 { 794 1016 ASSERT(!m_error); … … 1096 1318 // Null-terminate string for strtod. 1097 1319 m_buffer8.append('\0'); 1098 tokenData->doubleValue = WTF::strtod( m_buffer8.data(), 0);1320 tokenData->doubleValue = WTF::strtod(reinterpret_cast<const char*>(m_buffer8.data()), 0); 1099 1321 } 1100 1322 token = NUMBER; … … 1110 1332 break; 1111 1333 case CharacterQuote: 1112 if (lex Type &DontBuildStrings) {1334 if (lexerFlags & LexerFlagsDontBuildStrings) { 1113 1335 if (UNLIKELY(!parseString<false>(tokenData, strictMode))) 1114 1336 goto returnError; … … 1125 1347 // Fall through into CharacterBackSlash. 1126 1348 case CharacterBackSlash: 1127 if (lex Type &DontBuildKeywords)1128 token = parseIdentifier<false>(tokenData, lex Type, strictMode);1349 if (lexerFlags & LexexFlagsDontBuildKeywords) 1350 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode); 1129 1351 else 1130 token = parseIdentifier<true>(tokenData, lex Type, strictMode);1352 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode); 1131 1353 break; 1132 1354 case CharacterLineTerminator: … … 1179 1401 } 1180 1402 1181 bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) 1403 template <typename T> 1404 bool Lexer<T>::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) 1182 1405 { 1183 1406 ASSERT(m_buffer16.isEmpty()); … … 1240 1463 } 1241 1464 1242 bool Lexer::skipRegExp() 1465 template <typename T> 1466 bool Lexer<T>::skipRegExp() 1243 1467 { 1244 1468 bool lastWasEscape = false; … … 1280 1504 } 1281 1505 1282 void Lexer::clear() 1506 template <typename T> 1507 void Lexer<T>::clear() 1283 1508 { 1284 1509 m_arena = 0; 1285 1510 1286 Vector< char> newBuffer8;1511 Vector<LChar> newBuffer8; 1287 1512 m_buffer8.swap(newBuffer8); 1288 1513 … … 1293 1518 } 1294 1519 1295 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) 1520 template <typename T> 1521 SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine) 1296 1522 { 1297 1523 ASSERT(m_source->provider()->data()[openBrace] == '{'); … … 1300 1526 } 1301 1527 1528 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h 1529 template class Lexer<LChar>; 1530 template class Lexer<UChar>; 1531 1302 1532 } // namespace JSC
Note:
See TracChangeset
for help on using the changeset viewer.