Context Navigation

← Previous Change
Next Change →

Lexer.cpp

Timestamp:

May 22, 2019, 12:06:03 PM (6 years ago)

Author:

Ross Kirsling

Message:

[ESNext] Implement support for Numeric Separators
https://bugs.webkit.org/show_bug.cgi?id=196351

Reviewed by Keith Miller.

JSTests:

stress/numeric-literal-separators.js: Added.

Add tests for feature.

test262/expectations.yaml:

Mark 60 test cases as passing.

Source/JavaScriptCore:

Implement the following proposal, which is now Stage 3:

https://github.com/tc39/proposal-numeric-separator

Specifically, this allows _ to be used as a separator in numeric literals.
It may be inserted arbitrarily without semantic effect, but it may not occur:

multiple times in a row
at the beginning or end of the literal
adjacent to 0x, 0b, 0o, ., e, or n
after a leading zero (e.g. 0_123), even in sloppy mode

parser/Lexer.cpp:

(JSC::isASCIIDigitOrSeparator): Added.
(JSC::isASCIIHexDigitOrSeparator): Added.
(JSC::isASCIIBinaryDigitOrSeparator): Added.
(JSC::isASCIIOctalDigitOrSeparator): Added.
(JSC::Lexer<T>::parseHex):
(JSC::Lexer<T>::parseBinary):
(JSC::Lexer<T>::parseOctal):
(JSC::Lexer<T>::parseDecimal):
(JSC::Lexer<T>::parseNumberAfterDecimalPoint):
(JSC::Lexer<T>::parseNumberAfterExponentIndicator):
(JSC::Lexer<T>::lexWithoutClearingLineTerminator):

parser/Lexer.h:

File:

: 1 edited

trunk/Source/JavaScriptCore/parser/Lexer.cpp (modified) (21 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/JavaScriptCore/parser/Lexer.cpp

-              r244038
+              r245634
+}
+template<typename CharacterType>
+static inline bool isASCIIDigitOrSeparator(CharacterType character)
+{
+    return isASCIIDigit(character) || character == '_';
+}
+template<typename CharacterType>
+static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
+{
+    return isASCIIHexDigit(character) || character == '_';
+}
+template<typename CharacterType>
+static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
+{
+    return isASCIIBinaryDigit(character) || character == '_';
+}
+template<typename CharacterType>
+static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
+{
+    return isASCIIOctalDigit(character) || character == '_';
+}
 static inline LChar singleEscape(int c)
+{
 …
 template <typename T>
+ALWAYS_INLINE auto Lexer<T>::parseHex() -> NumberParseResult
+{
+ALWAYS_INLINE auto Lexer<T>::parseHex() -> Optional<NumberParseResult>
+{
+    ASSERT(isASCIIHexDigit(m_current));
     // Optimization: most hexadecimal values fit into 4 bytes.
     uint32_t hexValue = 0;
 …
     do {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIHexDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
         shift();
         --maximumDigits;
     } while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
+    } while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
     if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
         return hexValue;
+        return NumberParseResult { hexValue };
     // No more place in the hexValue buffer.
 …
+    }
+    while (isASCIIHexDigit(m_current)) {
+    while (isASCIIHexDigitOrSeparator(m_current)) {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIHexDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         record8(m_current);
         shift();
 …
     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
         return makeIdentifier(m_buffer8.data(), m_buffer8.size());
+        return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
     return parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
+    return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
+}
 …
 ALWAYS_INLINE auto Lexer<T>::parseBinary() -> Optional<NumberParseResult>
+{
+    ASSERT(isASCIIBinaryDigit(m_current));
     // Optimization: most binary values fit into 4 bytes.
     uint32_t binaryValue = 0;
 …
     do {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         binaryValue = (binaryValue << 1) + (m_current - '0');
         digits[digit] = m_current;
         shift();
         --digit;
     } while (isASCIIBinaryDigit(m_current) && digit >= 0);
     if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
         return Variant<double, const Identifier*> { binaryValue };
+    } while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
+    if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
+        return NumberParseResult { binaryValue };
     for (int i = maximumDigits - 1; i > digit; --i)
         record8(digits[i]);
+    while (isASCIIBinaryDigit(m_current)) {
+    while (isASCIIBinaryDigitOrSeparator(m_current)) {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         record8(m_current);
         shift();
 …
     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
+        return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
     if (isASCIIDigit(m_current))
         return WTF::nullopt;
     return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
+    return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
+}
 …
 ALWAYS_INLINE auto Lexer<T>::parseOctal() -> Optional<NumberParseResult>
+{
+    ASSERT(isASCIIOctalDigit(m_current));
     // Optimization: most octal values fit into 4 bytes.
     uint32_t octalValue = 0;
 …
     do {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIOctalDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         octalValue = octalValue * 8 + (m_current - '0');
         digits[digit] = m_current;
         shift();
         --digit;
+    } while (isASCIIOctalDigit(m_current) && digit >= 0);
+    if (LIKELY(!isASCIIDigit(m_current) && digit >= 0 && m_current != 'n'))
+        return Variant<double, const Identifier*> { octalValue };
+    } while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
+    if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
+        return NumberParseResult { octalValue };
     for (int i = maximumDigits - 1; i > digit; --i)
          record8(digits[i]);
+    while (isASCIIOctalDigit(m_current)) {
+    while (isASCIIOctalDigitOrSeparator(m_current)) {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIOctalDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         record8(m_current);
         shift();
 …
     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
+        return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
     if (isASCIIDigit(m_current))
         return WTF::nullopt;
     return Variant<double, const Identifier*> { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
+    return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
+}
 …
 ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> Optional<NumberParseResult>
+{
+    ASSERT(isASCIIDigit(m_current));
     // Optimization: most decimal values fit into 4 bytes.
     uint32_t decimalValue = 0;
 …
         do {
+            if (m_current == '_') {
+                if (UNLIKELY(!isASCIIDigit(peek(1))))
+                    return WTF::nullopt;
+                shift();
+            }
             decimalValue = decimalValue * 10 + (m_current - '0');
             digits[digit] = m_current;
             shift();
             --digit;
         } while (isASCIIDigit(m_current) && digit >= 0);
+        } while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
         if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
             return Variant<double, const Identifier*> { decimalValue };
+            return NumberParseResult { decimalValue };
         for (int i = maximumDigits - 1; i > digit; --i)
 …
+    }
+    while (isASCIIDigit(m_current)) {
+    while (isASCIIDigitOrSeparator(m_current)) {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIDigit(peek(1))))
+                return WTF::nullopt;
+            shift();
+        }
         record8(m_current);
         shift();
 …
     if (UNLIKELY(Options::useBigInt() && m_current == 'n'))
         return Variant<double, const Identifier*> { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
+        return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
     return WTF::nullopt;
 …
 template <typename T>
+ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
+{
+ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
+{
+    ASSERT(isASCIIDigit(m_current));
     record8('.');
+    while (isASCIIDigit(m_current)) {
+    do {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIDigit(peek(1))))
+                return false;
+            shift();
+        }
         record8(m_current);
         shift();
+    }
+    } while (isASCIIDigitOrSeparator(m_current));
+    return true;
+}
 …
     do {
+        if (m_current == '_') {
+            if (UNLIKELY(!isASCIIDigit(peek(1))))
+                return false;
+            shift();
+        }
         record8(m_current);
         shift();
+    } while (isASCIIDigit(m_current));
+    } while (isASCIIDigitOrSeparator(m_current));
     return true;
+}
 …
             break;
+        }
+        parseNumberAfterDecimalPoint();
+        if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
+            m_lexErrorMessage = "Non-number found after decimal point"_s;
+            token = INVALID_NUMERIC_LITERAL_ERRORTOK;
+            goto returnError;
+        }
         token = DOUBLE;
         if (isASCIIAlphaCaselessEqual(m_current, 'e')) {
 …
             auto parseNumberResult = parseHex();
+            if (WTF::holds_alternative<double>(parseNumberResult))
+                tokenData->doubleValue = WTF::get<double>(parseNumberResult);
+            if (!parseNumberResult)
+                tokenData->doubleValue = 0;
+            else if (WTF::holds_alternative<double>(*parseNumberResult))
+                tokenData->doubleValue = WTF::get<double>(*parseNumberResult);
             else {
                 token = BIGINT;
                 shift();
                 tokenData->bigIntString = WTF::get<const Identifier*>(parseNumberResult);
+                tokenData->bigIntString = WTF::get<const Identifier*>(*parseNumberResult);
                 tokenData->radix = 16;
+            }
 …
             m_buffer8.shrink(0);
             break;
+        }
+        if (UNLIKELY(m_current == '_')) {
+            m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
+            token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
+            goto returnError;
+        }
 …
                     if (m_current == '.') {
                         shift();
+                        parseNumberAfterDecimalPoint();
+                        if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
+                            m_lexErrorMessage = "Non-number found after decimal point"_s;
+                            token = INVALID_NUMERIC_LITERAL_ERRORTOK;
+                            goto returnError;
+                        }
                         token = DOUBLE;
+                    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 245634 in webkit for trunk/Source/JavaScriptCore/parser/Lexer.cpp

Legend:

trunk/Source/JavaScriptCore/parser/Lexer.cpp

Download in other formats: