Ignore:
Timestamp:
Mar 3, 2016, 5:24:28 PM (9 years ago)
Author:
[email protected]
Message:

[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns). In the spec, the
grammar is annotated with [U] annotations. Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time. Restricted what escapes are available for
non Unicode patterns. Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

  • parser/ASTBuilder.h:

(JSC::ASTBuilder::createRegExp):

  • parser/Parser.cpp:

(JSC::Parser<LexerType>::parsePrimaryExpression):

  • parser/SyntaxChecker.h:

(JSC::SyntaxChecker::createRegExp):

  • yarr/YarrInterpreter.cpp:

(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):

  • yarr/YarrParser.h:

(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):

  • yarr/YarrPattern.cpp:

(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):

  • yarr/YarrSyntaxChecker.cpp:

(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):

  • yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

  • js/regexp-unicode-expected.txt:
  • js/script-tests/regexp-unicode.js:

(shouldThrowInvalidEscape):

[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

  • js/Object-getOwnPropertyNames-expected.txt:
  • js/script-tests/Object-getOwnPropertyNames.js:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp

    r197426 r197534  
    209209            ASSERT(p < length);
    210210            int result = input[p];
    211             if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length
    212                 && U16_IS_TRAIL(input[p + 1])) {
     211            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
    213212                if (atEnd())
    214213                    return -1;
     
    220219        }
    221220       
    222         int readSurrogatePairChecked(unsigned negativePositionOffest)
    223         {
    224             RELEASE_ASSERT(pos >= negativePositionOffest);
    225             unsigned p = pos - negativePositionOffest;
     221        int readSurrogatePairChecked(unsigned negativePositionOffset)
     222        {
     223            RELEASE_ASSERT(pos >= negativePositionOffset);
     224            unsigned p = pos - negativePositionOffset;
    226225            ASSERT(p < length);
    227226            if (p + 1 >= length)
     
    229228
    230229            int first = input[p];
    231             if (U16_IS_LEAD(first) && U16_IS_TRAIL(input[p + 1]))
    232                 return U16_GET_SUPPLEMENTARY(first, input[p + 1]);
     230            int second = input[p + 1];
     231            if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
     232                return U16_GET_SUPPLEMENTARY(first, second);
    233233
    234234            return -1;
     
    239239            ASSERT(from < length);
    240240            int result = input[from];
    241             if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length
    242                 && U16_IS_TRAIL(input[from + 1])) {
    243                
     241            if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
    244242                result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
    245             }
    246243            return result;
    247244        }
     
    295292        }
    296293
    297         bool atStart(unsigned negativePositionOffest)
    298         {
    299             return pos == negativePositionOffest;
     294        bool atStart(unsigned negativePositionOffset)
     295        {
     296            return pos == negativePositionOffset;
    300297        }
    301298
     
    320317    bool testCharacterClass(CharacterClass* characterClass, int ch)
    321318    {
    322         if (ch & 0x1FFF80) {
     319        if (!isASCII(ch)) {
    323320            for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
    324321                if (ch == characterClass->m_matchesUnicode[i])
     
    434431            if (backTrack->matchAmount) {
    435432                --backTrack->matchAmount;
    436                 if (unicode && !U_IS_BMP(term.atom.patternCharacter))
    437                     input.uncheckInput(2);
    438                 else
    439                     input.uncheckInput(1);
     433                input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
    440434                return true;
    441435            }
     
    12681262        case ByteTerm::TypePatternCasedCharacterFixed: {
    12691263            if (unicode) {
    1270                 // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
     1264                // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
    12711265                ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
    12721266
     
    12911285            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
    12921286
    1293             // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
     1287            // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
    12941288            ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
    12951289
     
    13091303            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
    13101304
    1311             // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
     1305            // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
    13121306            ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
    13131307           
     
    16191613    {
    16201614        if (m_pattern.m_ignoreCase) {
    1621             ASSERT(u_tolower(ch) <= UCHAR_MAX_VALUE);
    1622             ASSERT(u_toupper(ch) <= UCHAR_MAX_VALUE);
    1623 
    16241615            UChar32 lo = u_tolower(ch);
    16251616            UChar32 hi = u_toupper(ch);
Note: See TracChangeset for help on using the changeset viewer.