Context Navigation

← Previous Change
Next Change →

YarrInterpreter.cpp

Timestamp:

Mar 3, 2016, 5:24:28 PM (9 years ago)

Author:

Message:

[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns). In the spec, the
grammar is annotated with [U] annotations. Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time. Restricted what escapes are available for
non Unicode patterns. Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

parser/ASTBuilder.h:

(JSC::ASTBuilder::createRegExp):

parser/Parser.cpp:

(JSC::Parser<LexerType>::parsePrimaryExpression):

parser/SyntaxChecker.h:

(JSC::SyntaxChecker::createRegExp):

yarr/YarrInterpreter.cpp:

(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):

yarr/YarrParser.h:

(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):

yarr/YarrPattern.cpp:

(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):

yarr/YarrSyntaxChecker.cpp:

(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):

yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

js/regexp-unicode-expected.txt:
js/script-tests/regexp-unicode.js:

(shouldThrowInvalidEscape):

[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

js/Object-getOwnPropertyNames-expected.txt:
js/script-tests/Object-getOwnPropertyNames.js:

File:

: 1 edited

trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp (modified) (11 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp

-              r197426
+              r197534
             ASSERT(p < length);
             int result = input[p];
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length
+                && U16_IS_TRAIL(input[p + 1])) {
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) {
                 if (atEnd())
                     return -1;
 …
+        }
         int readSurrogatePairChecked(unsigned negativePositionOffest)
+        {
             RELEASE_ASSERT(pos >= negativePositionOffest);
             unsigned p = pos - negativePositionOffest;
+        int readSurrogatePairChecked(unsigned negativePositionOffset)
+        {
+            RELEASE_ASSERT(pos >= negativePositionOffset);
+            unsigned p = pos - negativePositionOffset;
             ASSERT(p < length);
             if (p + 1 >= length)
 …
             int first = input[p];
+            if (U16_IS_LEAD(first) && U16_IS_TRAIL(input[p + 1]))
+                return U16_GET_SUPPLEMENTARY(first, input[p + 1]);
+            int second = input[p + 1];
+            if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
+                return U16_GET_SUPPLEMENTARY(first, second);
             return -1;
 …
             ASSERT(from < length);
             int result = input[from];
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length
+                && U16_IS_TRAIL(input[from + 1])) {
+            if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + 1 < length && U16_IS_TRAIL(input[from + 1]))
                 result = U16_GET_SUPPLEMENTARY(result, input[from + 1]);
+            }
             return result;
+        }
 …
+        }
         bool atStart(unsigned negativePositionOffest)
+        {
             return pos == negativePositionOffest;
+        bool atStart(unsigned negativePositionOffset)
+        {
+            return pos == negativePositionOffset;
+        }
 …
     bool testCharacterClass(CharacterClass* characterClass, int ch)
+    {
         if (ch & 0x1FFF80) {
+        if (!isASCII(ch)) {
             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
                 if (ch == characterClass->m_matchesUnicode[i])
 …
             if (backTrack->matchAmount) {
                 --backTrack->matchAmount;
+                if (unicode && !U_IS_BMP(term.atom.patternCharacter))
+                    input.uncheckInput(2);
+                else
+                    input.uncheckInput(1);
+                input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
                 return true;
+            }
 …
         case ByteTerm::TypePatternCasedCharacterFixed: {
             if (unicode) {
                 // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+                // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
                 ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
 …
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+            // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
 …
             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
             // Case insensitive matching of unicode charaters are handled as TypeCharacterClass
+            // Case insensitive matching of unicode characters is handled as TypeCharacterClass.
             ASSERT(!unicode || U_IS_BMP(currentTerm().atom.patternCharacter));
 …
+    {
         if (m_pattern.m_ignoreCase) {
-            ASSERT(u_tolower(ch) <= UCHAR_MAX_VALUE);
-            ASSERT(u_toupper(ch) <= UCHAR_MAX_VALUE);
             UChar32 lo = u_tolower(ch);
             UChar32 hi = u_toupper(ch);

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 197534 in webkit for trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp

Legend:

trunk/Source/JavaScriptCore/yarr/YarrInterpreter.cpp

Download in other formats: