Changeset 183552 in webkit for trunk/Source/JavaScriptCore/parser/Lexer.cpp
- Timestamp:
- Apr 29, 2015, 9:33:12 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/parser/Lexer.cpp
r183373 r183552 611 611 } 612 612 613 template <typename T> 614 typename Lexer<T>::UnicodeHexValue Lexer<T>::parseFourDigitUnicodeHex() 615 { 616 T char1 = peek(1); 617 T char2 = peek(2); 618 T char3 = peek(3); 619 620 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3))) 621 return UnicodeHexValue((m_code + 4) >= m_codeEnd ? UnicodeHexValue::IncompleteHex : UnicodeHexValue::InvalidHex); 622 623 int result = convertUnicode(m_current, char1, char2, char3); 613 struct ParsedUnicodeEscapeValue { 614 ParsedUnicodeEscapeValue(UChar32 value) 615 : m_value(value) 616 { 617 ASSERT(isValid()); 618 } 619 620 enum SpecialValueType { Incomplete = -2, Invalid = -1 }; 621 ParsedUnicodeEscapeValue(SpecialValueType type) 622 : m_value(type) 623 { 624 } 625 626 bool isValid() const { return m_value >= 0; } 627 bool isIncomplete() const { return m_value == Incomplete; } 628 629 UChar32 value() const 630 { 631 ASSERT(isValid()); 632 return m_value; 633 } 634 635 private: 636 UChar32 m_value; 637 }; 638 639 template<typename CharacterType> ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape() 640 { 641 if (m_current == '{') { 642 shift(); 643 UChar32 codePoint = 0; 644 do { 645 if (!isASCIIHexDigit(m_current)) 646 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete; 647 codePoint = (codePoint << 4) | toASCIIHexValue(m_current); 648 if (codePoint > UCHAR_MAX_VALUE) 649 return ParsedUnicodeEscapeValue::Invalid; 650 shift(); 651 } while (m_current != '}'); 652 shift(); 653 return codePoint; 654 } 655 656 auto character2 = peek(1); 657 auto character3 = peek(2); 658 auto character4 = peek(3); 659 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) 660 return (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid; 661 auto result = convertUnicode(m_current, character2, character3, character4); 624 662 shift(); 625 663 shift(); 626 664 shift(); 627 665 shift(); 628 return UnicodeHexValue(result);666 return result; 629 667 } 630 668 … … 666 704 } 667 705 706 static ALWAYS_INLINE bool isLatin1(UChar32 c) 707 { 708 return !(c & ~0xFF); 709 } 710 668 711 static inline bool isIdentStart(LChar c) 669 712 { … … 671 714 } 672 715 673 static inline bool isIdentStart(UChar c)716 static inline bool isIdentStart(UChar32 c) 674 717 { 675 718 return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c); 676 719 } 677 720 678 static NEVER_INLINE bool isNonLatin1IdentPart(int c) 679 { 721 static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c) 722 { 723 // FIXME: ES6 says this should be based on the Unicode property ID_Continue now instead. 680 724 return (U_GET_GC_MASK(c) & (U_GC_L_MASK | U_GC_MN_MASK | U_GC_MC_MASK | U_GC_ND_MASK | U_GC_PC_MASK)) || c == 0x200C || c == 0x200D; 681 725 } … … 689 733 } 690 734 735 static ALWAYS_INLINE bool isIdentPart(UChar32 c) 736 { 737 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c); 738 } 739 691 740 static ALWAYS_INLINE bool isIdentPart(UChar c) 692 741 { 693 return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c); 694 } 695 696 template <typename T> 697 bool isUnicodeEscapeIdentPart(const T* code) 698 { 699 T char1 = code[0]; 700 T char2 = code[1]; 701 T char3 = code[2]; 702 T char4 = code[3]; 703 704 if (!isASCIIHexDigit(char1) || !isASCIIHexDigit(char2) || !isASCIIHexDigit(char3) || !isASCIIHexDigit(char4)) 742 return isIdentPart(static_cast<UChar32>(c)); 743 } 744 745 template<typename CharacterType> ALWAYS_INLINE bool isIdentPartIncludingEscapeTemplate(const CharacterType* code, const CharacterType* codeEnd) 746 { 747 if (isIdentPart(code[0])) 748 return true; 749 750 // Shortest sequence handled below is \u{0}, which is 5 characters. 751 if (!(code[0] == '\\' && codeEnd - code >= 5 && code[1] == 'u')) 705 752 return false; 706 707 return isIdentPart(Lexer<T>::convertUnicode(char1, char2, char3, char4)); 753 754 if (code[2] == '{') { 755 UChar32 codePoint = 0; 756 const CharacterType* pointer; 757 for (pointer = &code[3]; pointer < codeEnd; ++pointer) { 758 auto digit = *pointer; 759 if (!isASCIIHexDigit(digit)) 760 break; 761 codePoint = (codePoint << 4) | toASCIIHexValue(digit); 762 if (codePoint > UCHAR_MAX_VALUE) 763 return false; 764 } 765 return isIdentPart(codePoint) && pointer < codeEnd && *pointer == '}'; 766 } 767 768 // Shortest sequence handled below is \uXXXX, which is 6 characters. 769 if (codeEnd - code < 6) 770 return false; 771 772 auto character1 = code[2]; 773 auto character2 = code[3]; 774 auto character3 = code[4]; 775 auto character4 = code[5]; 776 return isASCIIHexDigit(character1) && isASCIIHexDigit(character2) && isASCIIHexDigit(character3) && isASCIIHexDigit(character4) 777 && isIdentPart(Lexer<LChar>::convertUnicode(character1, character2, character3, character4)); 708 778 } 709 779 710 780 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const LChar* code, const LChar* codeEnd) 711 781 { 712 if (isIdentPart(*code)) 713 return true; 714 715 return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2)); 782 return isIdentPartIncludingEscapeTemplate(code, codeEnd); 716 783 } 717 784 718 785 static ALWAYS_INLINE bool isIdentPartIncludingEscape(const UChar* code, const UChar* codeEnd) 719 786 { 720 if (isIdentPart(*code)) 721 return true; 722 723 return (*code == '\\' && ((codeEnd - code) >= 6) && code[1] == 'u' && isUnicodeEscapeIdentPart(code+2)); 787 return isIdentPartIncludingEscapeTemplate(code, codeEnd); 724 788 } 725 789 … … 800 864 } 801 865 866 template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint) 867 { 868 ASSERT(codePoint >= 0); 869 ASSERT(codePoint <= UCHAR_MAX_VALUE); 870 if (U_IS_BMP(codePoint)) 871 record16(codePoint); 872 else { 873 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) }; 874 append16(codeUnits, 2); 875 } 876 } 877 802 878 #if !ASSERT_DISABLED 803 879 bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident) … … 808 884 * be used as a safety net while implementing builtins. 809 885 */ 886 // FIXME: How can a debug-only assertion be a safety net? 810 887 if (*ident == vm.propertyNames->builtinNames().callPublicName()) 811 888 return false; … … 961 1038 } 962 1039 963 template <typename T> 964 template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) 1040 template<typename CharacterType> template<bool shouldCreateIdentifier> JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode) 965 1041 { 966 1042 const ptrdiff_t remaining = m_codeEnd - m_code; 967 const T*identifierStart = currentSourcePtr();1043 auto identifierStart = currentSourcePtr(); 968 1044 bool bufferRequired = false; 969 1045 … … 984 1060 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK; 985 1061 shift(); 986 UnicodeHexValue character = parseFourDigitUnicodeHex();1062 auto character = parseUnicodeEscape(); 987 1063 if (UNLIKELY(!character.isValid())) 988 return character.valueType() == UnicodeHexValue::IncompleteHex ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; 989 UChar ucharacter = static_cast<UChar>(character.value()); 990 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter))) 1064 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; 1065 if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character.value()) : !isIdentStart(character.value()))) 991 1066 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK; 992 1067 if (shouldCreateIdentifier) 993 record 16(ucharacter);1068 recordUnicodeCodePoint(character.value()); 994 1069 identifierStart = currentSourcePtr(); 995 1070 } 996 1071 997 1072 int identifierLength; 998 const Identifier* ident = 0;1073 const Identifier* ident = nullptr; 999 1074 if (shouldCreateIdentifier) { 1000 1075 if (!bufferRequired) { … … 1009 1084 tokenData->ident = ident; 1010 1085 } else 1011 tokenData->ident = 0;1086 tokenData->ident = nullptr; 1012 1087 1013 1088 if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) { … … 1126 1201 if (m_current == 'u') { 1127 1202 shift(); 1128 UnicodeHexValue character = parseFourDigitUnicodeHex();1129 if (character.isValid()) {1130 if (shouldBuildStrings)1131 record16(character.value());1132 return StringParsedSuccessfully;1133 }1134 1203 1135 1204 if (escapeParseMode == EscapeParseMode::String && m_current == stringQuoteCharacter) { … … 1139 1208 } 1140 1209 1210 auto character = parseUnicodeEscape(); 1211 if (character.isValid()) { 1212 if (shouldBuildStrings) 1213 recordUnicodeCodePoint(character.value()); 1214 return StringParsedSuccessfully; 1215 } 1216 1141 1217 m_lexErrorMessage = ASCIILiteral("\\u can only be followed by a Unicode character sequence"); 1142 return character. valueType() == UnicodeHexValue::IncompleteHex? StringUnterminated : StringCannotBeParsed;1218 return character.isIncomplete() ? StringUnterminated : StringCannotBeParsed; 1143 1219 } 1144 1220
Note:
See TracChangeset
for help on using the changeset viewer.