source: webkit/trunk/Source/JavaScriptCore/parser/Lexer.cpp

Last change on this file was 291779, checked in by Chris Dumez, 3 years ago

Prepare JSC for making the String(const char*) constructor explicit
https://bugs.webkit.org/show_bug.cgi?id=238264

Reviewed by Geoff Garen.

Source/JavaScriptCore:

Prepare JSC for making the String(const char*) constructor explicit. Making this constructor
explicit helps catch many instances where we're constructing a String from a literal but we
are missing the ""_s suffix.

  • API/JSAPIGlobalObject.cpp:
  • API/JSAPIGlobalObject.mm:

(JSC::computeValidImportSpecifier):
(JSC::JSAPIGlobalObject::moduleLoaderFetch):

  • API/JSAPIValueWrapper.cpp:
  • API/JSAPIWrapperObject.mm:
  • API/JSCallbackConstructor.cpp:
  • API/JSCallbackFunction.cpp:
  • API/JSCallbackObject.cpp:
  • API/JSCallbackObjectFunctions.h:

(JSC::JSCallbackObject<Parent>::getOwnPropertySlot):

  • API/JSScript.mm:

(-[JSScript readCache]):

  • API/JSTypedArray.cpp:

(JSObjectMakeTypedArrayWithArrayBuffer):
(JSObjectMakeTypedArrayWithArrayBufferAndOffset):

  • API/JSValue.mm:

(createStructHandlerMap):
(handerForStructTag):

  • API/JSWrapperMap.mm:

(allocateConstructorForCustomClass):

  • API/ObjCCallbackFunction.mm:

(JSC::ObjCCallbackFunctionImpl::name):

  • API/glib/JSAPIWrapperGlobalObject.cpp:
  • API/glib/JSAPIWrapperObjectGLib.cpp:
  • API/glib/JSCCallbackFunction.cpp:
  • API/tests/JSONParseTest.cpp:

(testJSONParse):

  • bindings/ScriptFunctionCall.cpp:

(Deprecated::ScriptCallArgumentHandler::appendArgument):

  • bytecode/CodeBlock.cpp:
  • bytecode/EvalCodeBlock.cpp:
  • bytecode/ExecutableToCodeBlockEdge.cpp:
  • bytecode/ExitKind.cpp:

(JSC::exitKindToString):

  • bytecode/ExitKind.h:
  • bytecode/FunctionCodeBlock.cpp:
  • bytecode/ModuleProgramCodeBlock.cpp:
  • bytecode/ProgramCodeBlock.cpp:
  • bytecode/UnlinkedCodeBlock.cpp:
  • bytecode/UnlinkedEvalCodeBlock.cpp:
  • bytecode/UnlinkedFunctionCodeBlock.cpp:
  • bytecode/UnlinkedFunctionExecutable.cpp:
  • bytecode/UnlinkedModuleProgramCodeBlock.cpp:
  • bytecode/UnlinkedProgramCodeBlock.cpp:
  • bytecompiler/BytecodeGenerator.cpp:

(JSC::BytecodeGenerator::BytecodeGenerator):
(JSC::BytecodeGenerator::emitHasPrivateBrand):
(JSC::BytecodeGenerator::emitCheckPrivateBrand):
(JSC::BytecodeGenerator::emitReturn):

  • bytecompiler/NodesCodegen.cpp:

(JSC::PropertyListNode::emitSaveComputedFieldName):
(JSC::BaseDotNode::emitGetPropertyValue):
(JSC::BaseDotNode::emitPutProperty):
(JSC::PostfixNode::emitDot):
(JSC::DeleteBracketNode::emitBytecode):
(JSC::DeleteDotNode::emitBytecode):
(JSC::PrefixNode::emitDot):
(JSC::InstanceOfNode::emitBytecode):

  • debugger/DebuggerCallFrame.cpp:

(JSC::DebuggerCallFrame::functionName const):

  • debugger/DebuggerScope.cpp:
  • heap/HeapSnapshotBuilder.cpp:

(JSC::HeapSnapshotBuilder::json):

  • heap/RootMarkReason.cpp:

(JSC::rootMarkReasonDescription):

  • heap/RootMarkReason.h:
  • inspector/JSGlobalObjectInspectorController.cpp:

(Inspector::JSGlobalObjectInspectorController::appendAPIBacktrace):

  • inspector/JSInjectedScriptHost.cpp:

(Inspector::JSInjectedScriptHost::getInternalProperties):

  • inspector/JSInjectedScriptHostPrototype.cpp:
  • inspector/JSJavaScriptCallFrame.cpp:
  • inspector/JSJavaScriptCallFramePrototype.cpp:
  • inspector/agents/InspectorAuditAgent.cpp:

(Inspector::InspectorAuditAgent::run):

  • inspector/agents/InspectorDebuggerAgent.cpp:

(Inspector::InspectorDebuggerAgent::getScriptSource):

  • inspector/agents/InspectorDebuggerAgent.h:
  • inspector/agents/InspectorHeapAgent.cpp:

(Inspector::InspectorHeapAgent::getRemoteObject):

  • jit/ExecutableAllocator.cpp:

(JSC::dumpJITMemory):

  • jit/JITCode.cpp:

(JSC::JITCode::typeName):

  • jit/JITCode.h:
  • jit/JITOperations.cpp:

(JSC::getWrappedValue):

  • jsc.cpp:

(toCString):
(JSC_DEFINE_HOST_FUNCTION):
(runWithOptions):
(CommandLine::parseArguments):

  • llint/LLIntSlowPaths.cpp:

(JSC::LLInt::handleVarargsCheckpoint):

  • parser/Lexer.cpp:

(JSC::Lexer<T>::scanRegExp):

  • parser/Parser.cpp:

(JSC::Parser<LexerType>::parsePrimaryExpression):

  • runtime/AbstractModuleRecord.cpp:
  • runtime/AggregateErrorConstructor.cpp:
  • runtime/ArrayConstructor.cpp:
  • runtime/ArrayIteratorPrototype.cpp:
  • runtime/ArrayPrototype.cpp:
  • runtime/AsyncFromSyncIteratorPrototype.cpp:
  • runtime/AsyncFunctionConstructor.cpp:
  • runtime/AsyncFunctionPrototype.cpp:
  • runtime/AsyncGeneratorFunctionConstructor.cpp:
  • runtime/AsyncGeneratorFunctionPrototype.cpp:
  • runtime/AsyncGeneratorPrototype.cpp:
  • runtime/AsyncIteratorPrototype.cpp:
  • runtime/AtomicsObject.cpp:
  • runtime/BigIntConstructor.cpp:
  • runtime/BigIntObject.cpp:
  • runtime/BigIntPrototype.cpp:
  • runtime/BooleanConstructor.cpp:
  • runtime/BooleanObject.cpp:
  • runtime/BooleanPrototype.cpp:
  • runtime/BytecodeCacheError.cpp:

(JSC::BytecodeCacheError::StandardError::message const):

  • runtime/CallData.cpp:

(JSC::call):

  • runtime/CallData.h:
  • runtime/ClassInfo.h:
  • runtime/ClonedArguments.cpp:
  • runtime/CodeCache.cpp:

(JSC::CodeCache::getUnlinkedGlobalFunctionExecutable):

  • runtime/Completion.cpp:

(JSC::checkModuleSyntax):
(JSC::createSymbolForEntryPointModule):

  • runtime/ConsoleObject.cpp:
  • runtime/ConstructData.cpp:

(JSC::construct):

  • runtime/ConstructData.h:
  • runtime/CustomGetterSetter.cpp:
  • runtime/DOMAttributeGetterSetter.cpp:
  • runtime/DateConstructor.cpp:
  • runtime/DateInstance.cpp:
  • runtime/DatePrototype.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • runtime/DirectArguments.cpp:
  • runtime/Error.h:

(JSC::throwVMError):

  • runtime/ErrorConstructor.cpp:
  • runtime/ErrorInstance.cpp:
  • runtime/ErrorPrototype.cpp:
  • runtime/EvalExecutable.cpp:
  • runtime/Exception.cpp:
  • runtime/ExceptionHelpers.cpp:

(JSC::notAFunctionSourceAppender):
(JSC::invalidParameterInSourceAppender):
(JSC::invalidParameterInstanceofSourceAppender):
(JSC::invalidPrototypeSourceAppender):
(JSC::createTDZError):
(JSC::createInvalidPrivateNameError):
(JSC::createRedefinedPrivateNameError):
(JSC::createPrivateMethodAccessError):
(JSC::createReinstallPrivateMethodError):

  • runtime/ExecutableBase.cpp:
  • runtime/FinalizationRegistryConstructor.cpp:
  • runtime/FinalizationRegistryPrototype.cpp:
  • runtime/FunctionConstructor.cpp:
  • runtime/FunctionExecutable.cpp:

(JSC::FunctionExecutable::toStringSlow):

  • runtime/FunctionPrototype.cpp:
  • runtime/FunctionRareData.cpp:
  • runtime/GeneratorFunctionConstructor.cpp:
  • runtime/GeneratorFunctionPrototype.cpp:
  • runtime/GeneratorPrototype.cpp:
  • runtime/GetterSetter.cpp:
  • runtime/GlobalExecutable.cpp:
  • runtime/HashMapImpl.cpp:
  • runtime/InternalFunction.cpp:
  • runtime/IntlCollator.cpp:

(JSC::IntlCollator::initializeCollator):

  • runtime/IntlCollatorConstructor.cpp:
  • runtime/IntlCollatorPrototype.cpp:
  • runtime/IntlDateTimeFormat.cpp:

(JSC::IntlDateTimeFormat::initializeDateTimeFormat):

  • runtime/IntlDateTimeFormatConstructor.cpp:
  • runtime/IntlDateTimeFormatPrototype.cpp:
  • runtime/IntlDisplayNames.cpp:
  • runtime/IntlDisplayNamesConstructor.cpp:
  • runtime/IntlDisplayNamesPrototype.cpp:
  • runtime/IntlListFormat.cpp:
  • runtime/IntlListFormatConstructor.cpp:
  • runtime/IntlListFormatPrototype.cpp:
  • runtime/IntlLocale.cpp:

(JSC::IntlLocale::initializeLocale):

  • runtime/IntlLocaleConstructor.cpp:
  • runtime/IntlLocalePrototype.cpp:
  • runtime/IntlNumberFormat.cpp:

(JSC::IntlNumberFormat::initializeNumberFormat):

  • runtime/IntlNumberFormatConstructor.cpp:
  • runtime/IntlNumberFormatPrototype.cpp:
  • runtime/IntlObject.cpp:

(JSC::intlStringOption):

  • runtime/IntlObject.h:
  • runtime/IntlPluralRules.cpp:
  • runtime/IntlPluralRulesConstructor.cpp:
  • runtime/IntlPluralRulesPrototype.cpp:
  • runtime/IntlRelativeTimeFormat.cpp:

(JSC::IntlRelativeTimeFormat::initializeRelativeTimeFormat):

  • runtime/IntlRelativeTimeFormatConstructor.cpp:
  • runtime/IntlRelativeTimeFormatPrototype.cpp:
  • runtime/IntlSegmentIterator.cpp:
  • runtime/IntlSegmentIteratorPrototype.cpp:
  • runtime/IntlSegmenter.cpp:
  • runtime/IntlSegmenterConstructor.cpp:
  • runtime/IntlSegmenterPrototype.cpp:
  • runtime/IntlSegments.cpp:
  • runtime/IntlSegmentsPrototype.cpp:
  • runtime/IteratorPrototype.cpp:
  • runtime/JSArray.cpp:

(JSC::JSArray::tryCreateUninitializedRestricted):

  • runtime/JSArrayBuffer.cpp:
  • runtime/JSArrayBufferConstructor.cpp:
  • runtime/JSArrayBufferPrototype.cpp:
  • runtime/JSArrayBufferView.cpp:
  • runtime/JSArrayIterator.cpp:
  • runtime/JSAsyncFunction.cpp:
  • runtime/JSAsyncGenerator.cpp:
  • runtime/JSAsyncGeneratorFunction.cpp:
  • runtime/JSBigInt.cpp:

(JSC::JSBigInt::parseInt):

  • runtime/JSBoundFunction.cpp:
  • runtime/JSCallee.cpp:
  • runtime/JSCell.cpp:

(JSC::JSCell::dumpToStream):
(JSC::JSCell::className const):

  • runtime/JSCell.h:
  • runtime/JSCustomGetterFunction.cpp:
  • runtime/JSCustomSetterFunction.cpp:
  • runtime/JSDataView.cpp:
  • runtime/JSDataViewPrototype.cpp:

(JSC::JSC_DEFINE_CUSTOM_GETTER):

  • runtime/JSFinalizationRegistry.cpp:

(JSC::JSFinalizationRegistry::runFinalizationCleanup):

  • runtime/JSFunction.cpp:
  • runtime/JSGenerator.cpp:
  • runtime/JSGeneratorFunction.cpp:
  • runtime/JSGenericTypedArrayViewInlines.h:

(JSC::JSGenericTypedArrayView<Adaptor>::create):
(JSC::JSGenericTypedArrayView<Adaptor>::validateRange):

  • runtime/JSGenericTypedArrayViewPrototypeFunctions.h:

(JSC::speciesConstruct):
(JSC::genericTypedArrayViewProtoFuncSet):
(JSC::genericTypedArrayViewPrivateFuncSubarrayCreate):

  • runtime/JSGlobalLexicalEnvironment.cpp:
  • runtime/JSGlobalObject.cpp:
  • runtime/JSImmutableButterfly.cpp:
  • runtime/JSInternalPromise.cpp:
  • runtime/JSInternalPromiseConstructor.cpp:
  • runtime/JSInternalPromisePrototype.cpp:
  • runtime/JSLexicalEnvironment.cpp:
  • runtime/JSMap.cpp:
  • runtime/JSMapIterator.cpp:
  • runtime/JSModuleEnvironment.cpp:
  • runtime/JSModuleLoader.cpp:
  • runtime/JSModuleNamespaceObject.cpp:
  • runtime/JSModuleRecord.cpp:
  • runtime/JSNativeStdFunction.cpp:
  • runtime/JSONObject.cpp:
  • runtime/JSObject.cpp:

(JSC::JSObject::markAuxiliaryAndVisitOutOfLineProperties):

  • runtime/JSPromise.cpp:
  • runtime/JSPromiseConstructor.cpp:
  • runtime/JSPromisePrototype.cpp:
  • runtime/JSPropertyNameEnumerator.cpp:
  • runtime/JSProxy.cpp:
  • runtime/JSRemoteFunction.cpp:

(JSC::wrapArgument):
(JSC::wrapReturnValue):
(JSC::JSRemoteFunction::finishCreation):

  • runtime/JSScope.cpp:
  • runtime/JSScriptFetchParameters.cpp:
  • runtime/JSScriptFetcher.cpp:
  • runtime/JSSegmentedVariableObject.cpp:
  • runtime/JSSet.cpp:
  • runtime/JSSetIterator.cpp:
  • runtime/JSSourceCode.cpp:
  • runtime/JSString.cpp:

(JSC::JSString::dumpToStream):

  • runtime/JSStringIterator.cpp:
  • runtime/JSSymbolTableObject.cpp:
  • runtime/JSTemplateObjectDescriptor.cpp:
  • runtime/JSTypedArrayConstructors.cpp:
  • runtime/JSTypedArrayPrototypes.cpp:
  • runtime/JSTypedArrayViewConstructor.cpp:
  • runtime/JSTypedArrayViewPrototype.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • runtime/JSTypedArrays.cpp:
  • runtime/JSWeakMap.cpp:
  • runtime/JSWeakObjectRef.cpp:
  • runtime/JSWeakSet.cpp:
  • runtime/JSWithScope.cpp:
  • runtime/LiteralParser.cpp:

(JSC::LiteralParser<CharType>::parsePrimitiveValue):

  • runtime/MapConstructor.cpp:
  • runtime/MapIteratorPrototype.cpp:
  • runtime/MapPrototype.cpp:
  • runtime/MathObject.cpp:
  • runtime/ModuleProgramExecutable.cpp:
  • runtime/NativeErrorConstructor.cpp:
  • runtime/NativeExecutable.cpp:
  • runtime/NullGetterFunction.cpp:
  • runtime/NullSetterFunction.cpp:
  • runtime/NumberConstructor.cpp:
  • runtime/NumberObject.cpp:
  • runtime/NumberPrototype.cpp:

(JSC::toStringWithRadix):
(JSC::JSC_DEFINE_HOST_FUNCTION):
(JSC::numberToStringInternal):
(JSC::int52ToString):

  • runtime/ObjectConstructor.cpp:
  • runtime/ObjectPrototype.cpp:
  • runtime/Operations.h:

(JSC::arithmeticBinaryOp):
(JSC::shift):
(JSC::bitwiseBinaryOp):

  • runtime/ProgramExecutable.cpp:
  • runtime/PropertyTable.cpp:
  • runtime/ProxyConstructor.cpp:

(JSC::ProxyConstructor::finishCreation):

  • runtime/ProxyObject.cpp:
  • runtime/ProxyRevoke.cpp:
  • runtime/ReflectObject.cpp:
  • runtime/RegExp.cpp:
  • runtime/RegExpCache.cpp:

(JSC::RegExpCache::ensureEmptyRegExpSlow):

  • runtime/RegExpConstructor.cpp:
  • runtime/RegExpObject.cpp:
  • runtime/RegExpPrototype.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • runtime/RegExpStringIteratorPrototype.cpp:
  • runtime/ScopedArguments.cpp:
  • runtime/ScopedArgumentsTable.cpp:
  • runtime/ScriptExecutable.cpp:
  • runtime/SetConstructor.cpp:
  • runtime/SetIteratorPrototype.cpp:
  • runtime/SetPrototype.cpp:
  • runtime/ShadowRealmConstructor.cpp:
  • runtime/ShadowRealmObject.cpp:
  • runtime/ShadowRealmPrototype.cpp:
  • runtime/SparseArrayValueMap.cpp:
  • runtime/StrictEvalActivation.cpp:
  • runtime/StringConstructor.cpp:
  • runtime/StringIteratorPrototype.cpp:
  • runtime/StringObject.cpp:
  • runtime/StringPrototype.cpp:

(JSC::toLocaleCase):
(JSC::JSC_DEFINE_HOST_FUNCTION):

  • runtime/Structure.cpp:
  • runtime/StructureChain.cpp:
  • runtime/StructureRareData.cpp:
  • runtime/Symbol.cpp:
  • runtime/SymbolConstructor.cpp:
  • runtime/SymbolObject.cpp:
  • runtime/SymbolPrototype.cpp:
  • runtime/SymbolTable.cpp:
  • runtime/TemporalCalendar.cpp:
  • runtime/TemporalCalendarConstructor.cpp:
  • runtime/TemporalCalendarPrototype.cpp:
  • runtime/TemporalDuration.cpp:

(JSC::TemporalDuration::total const):

  • runtime/TemporalDurationConstructor.cpp:
  • runtime/TemporalDurationPrototype.cpp:
  • runtime/TemporalInstant.cpp:
  • runtime/TemporalInstantConstructor.cpp:
  • runtime/TemporalInstantPrototype.cpp:
  • runtime/TemporalNow.cpp:
  • runtime/TemporalObject.cpp:

(JSC::temporalLargestUnit):
(JSC::temporalSmallestUnit):

  • runtime/TemporalPlainDate.cpp:
  • runtime/TemporalPlainDateConstructor.cpp:
  • runtime/TemporalPlainDatePrototype.cpp:
  • runtime/TemporalPlainTime.cpp:
  • runtime/TemporalPlainTimeConstructor.cpp:
  • runtime/TemporalPlainTimePrototype.cpp:
  • runtime/TemporalTimeZone.cpp:
  • runtime/TemporalTimeZoneConstructor.cpp:
  • runtime/TemporalTimeZonePrototype.cpp:
  • runtime/TypeProfiler.cpp:

(JSC::TypeProfiler::logTypesForTypeLocation):

  • runtime/TypeSet.cpp:

(JSC::StructureShape::propertyHash):

  • runtime/WeakMapConstructor.cpp:
  • runtime/WeakMapPrototype.cpp:

(JSC::WeakMapPrototype::finishCreation):

  • runtime/WeakObjectRefConstructor.cpp:
  • runtime/WeakObjectRefPrototype.cpp:
  • runtime/WeakSetConstructor.cpp:
  • runtime/WeakSetPrototype.cpp:

(JSC::WeakSetPrototype::finishCreation):

  • testRegExp.cpp:
  • tools/FunctionAllowlist.cpp:

(JSC::FunctionAllowlist::FunctionAllowlist):

  • tools/FunctionOverrides.cpp:

(JSC::initializeOverrideInfo):

  • tools/JSDollarVM.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):
(JSC::JSC_DEFINE_HOST_FUNCTION_WITH_ATTRIBUTES):

  • tools/SigillCrashAnalyzer.cpp:

(JSC::SigillCrashAnalyzer::analyze):

  • tools/VMInspector.cpp:

(JSC::VMInspector::dumpRegisters):
(JSC::VMInspector::dumpCellMemoryToStream):

  • wasm/WasmExceptionType.h:

(JSC::Wasm::errorMessageForExceptionType):

  • wasm/WasmFormat.h:

(JSC::Wasm::typeToString):

  • wasm/WasmGlobal.cpp:

(JSC::Wasm::Global::set):

  • wasm/WasmLLIntPlan.cpp:

(JSC::Wasm::LLIntPlan::didCompleteCompilation):

  • wasm/WasmOperations.cpp:

(JSC::Wasm::JSC_DEFINE_JIT_OPERATION):

  • wasm/js/JSToWasmICCallee.cpp:
  • wasm/js/JSWebAssembly.cpp:
  • wasm/js/JSWebAssemblyException.cpp:
  • wasm/js/JSWebAssemblyGlobal.cpp:

(JSC::JSWebAssemblyGlobal::type):

  • wasm/js/JSWebAssemblyHelpers.h:

(JSC::fromJSValue):

  • wasm/js/JSWebAssemblyInstance.cpp:

(JSC::JSWebAssemblyInstance::createPrivateModuleKey):
(JSC::JSWebAssemblyInstance::tryCreate):

  • wasm/js/JSWebAssemblyMemory.cpp:
  • wasm/js/JSWebAssemblyModule.cpp:
  • wasm/js/JSWebAssemblyTable.cpp:

(JSC::JSWebAssemblyTable::type):

  • wasm/js/JSWebAssemblyTag.cpp:
  • wasm/js/WebAssemblyCompileErrorConstructor.cpp:
  • wasm/js/WebAssemblyCompileErrorPrototype.cpp:
  • wasm/js/WebAssemblyExceptionConstructor.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • wasm/js/WebAssemblyExceptionPrototype.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • wasm/js/WebAssemblyFunction.cpp:
  • wasm/js/WebAssemblyFunctionBase.cpp:
  • wasm/js/WebAssemblyGlobalConstructor.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • wasm/js/WebAssemblyGlobalPrototype.cpp:
  • wasm/js/WebAssemblyInstanceConstructor.cpp:
  • wasm/js/WebAssemblyInstancePrototype.cpp:
  • wasm/js/WebAssemblyLinkErrorConstructor.cpp:
  • wasm/js/WebAssemblyLinkErrorPrototype.cpp:
  • wasm/js/WebAssemblyMemoryConstructor.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • wasm/js/WebAssemblyMemoryPrototype.cpp:
  • wasm/js/WebAssemblyModuleConstructor.cpp:
  • wasm/js/WebAssemblyModulePrototype.cpp:
  • wasm/js/WebAssemblyModuleRecord.cpp:

(JSC::WebAssemblyModuleRecord::initializeExports):

  • wasm/js/WebAssemblyRuntimeErrorConstructor.cpp:
  • wasm/js/WebAssemblyRuntimeErrorPrototype.cpp:
  • wasm/js/WebAssemblyTableConstructor.cpp:

(JSC::JSC_DEFINE_HOST_FUNCTION):

  • wasm/js/WebAssemblyTablePrototype.cpp:
  • wasm/js/WebAssemblyTagConstructor.cpp:
  • wasm/js/WebAssemblyTagPrototype.cpp:
  • wasm/js/WebAssemblyWrapperFunction.cpp:

(JSC::WebAssemblyWrapperFunction::create):

  • yarr/RegularExpression.cpp:

(JSC::Yarr::RegularExpression::Private::compile):

  • yarr/YarrErrorCode.cpp:

(JSC::Yarr::errorMessage):

  • yarr/YarrErrorCode.h:

Source/WebCore:

  • bindings/js/JSDOMGlobalObject.cpp:
  • bindings/js/JSDOMWindowBase.cpp:
  • bindings/js/JSDOMWindowProperties.cpp:
  • bindings/js/JSFileSystemDirectoryHandleIterator.cpp:
  • bindings/js/JSIDBSerializationGlobalObject.cpp:
  • bindings/js/JSRemoteDOMWindowBase.cpp:
  • bindings/js/JSShadowRealmGlobalScopeBase.cpp:
  • bindings/js/JSWindowProxy.cpp:
  • bindings/js/JSWorkerGlobalScopeBase.cpp:
  • bindings/js/JSWorkletGlobalScopeBase.cpp:
  • bindings/scripts/CodeGeneratorJS.pm:

(GenerateImplementation):

  • bridge/objc/ObjCRuntimeObject.mm:
  • bridge/objc/objc_instance.mm:

(JSC::Bindings::ObjcInstance::getMethod):

  • bridge/objc/objc_runtime.mm:
  • bridge/runtime_array.cpp:
  • bridge/runtime_method.cpp:
  • bridge/runtime_object.cpp:

Source/WTF:

  • wtf/ResourceUsage.h:
  • wtf/cocoa/ResourceUsageCocoa.cpp:

(WTF::displayNameForVMTag):

  • wtf/text/WTFString.cpp:

(WTF::String::numberToStringFixedPrecision):
(WTF::String::number):
(WTF::String::numberToStringFixedWidth):

  • wtf/text/WTFString.h:
  • Property svn:eol-style set to native
File size: 93.9 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2006-2019 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich ([email protected])
5 * Copyright (C) 2010 Zoltan Herczeg ([email protected])
6 * Copyright (C) 2012 Mathias Bynens ([email protected])
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "Lexer.h"
27
28#include "BuiltinNames.h"
29#include "Identifier.h"
30#include "KeywordLookup.h"
31#include "Lexer.lut.h"
32#include "ParseInt.h"
33#include <limits.h>
34#include <string.h>
35#include <variant>
36#include <wtf/Assertions.h>
37#include <wtf/HexNumber.h>
38#include <wtf/dtoa.h>
39
40namespace JSC {
41
42bool isLexerKeyword(const Identifier& identifier)
43{
44 return JSC::mainTable.entry(identifier);
45}
46
47enum CharacterType : uint8_t {
48 // Types for the main switch
49
50 // The first three types are fixed, and also used for identifying
51 // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
52 CharacterIdentifierStart,
53 CharacterZero,
54 CharacterNumber,
55
56 // For single-byte characters grandfathered into Other_ID_Continue -- namely just U+00B7 MIDDLE DOT.
57 // (http://unicode.org/reports/tr31/#Backward_Compatibility)
58 CharacterOtherIdentifierPart,
59
60 CharacterInvalid,
61 CharacterLineTerminator,
62 CharacterExclamationMark,
63 CharacterOpenParen,
64 CharacterCloseParen,
65 CharacterOpenBracket,
66 CharacterCloseBracket,
67 CharacterComma,
68 CharacterColon,
69 CharacterQuestion,
70 CharacterTilde,
71 CharacterQuote,
72 CharacterBackQuote,
73 CharacterDot,
74 CharacterSlash,
75 CharacterBackSlash,
76 CharacterSemicolon,
77 CharacterOpenBrace,
78 CharacterCloseBrace,
79
80 CharacterAdd,
81 CharacterSub,
82 CharacterMultiply,
83 CharacterModulo,
84 CharacterAnd,
85 CharacterXor,
86 CharacterOr,
87 CharacterLess,
88 CharacterGreater,
89 CharacterEqual,
90
91 // Other types (only one so far)
92 CharacterWhiteSpace,
93 CharacterHash,
94 CharacterPrivateIdentifierStart
95};
96
97// 256 Latin-1 codes
98static constexpr const CharacterType typesOfLatin1Characters[256] = {
99/* 0 - Null */ CharacterInvalid,
100/* 1 - Start of Heading */ CharacterInvalid,
101/* 2 - Start of Text */ CharacterInvalid,
102/* 3 - End of Text */ CharacterInvalid,
103/* 4 - End of Transm. */ CharacterInvalid,
104/* 5 - Enquiry */ CharacterInvalid,
105/* 6 - Acknowledgment */ CharacterInvalid,
106/* 7 - Bell */ CharacterInvalid,
107/* 8 - Back Space */ CharacterInvalid,
108/* 9 - Horizontal Tab */ CharacterWhiteSpace,
109/* 10 - Line Feed */ CharacterLineTerminator,
110/* 11 - Vertical Tab */ CharacterWhiteSpace,
111/* 12 - Form Feed */ CharacterWhiteSpace,
112/* 13 - Carriage Return */ CharacterLineTerminator,
113/* 14 - Shift Out */ CharacterInvalid,
114/* 15 - Shift In */ CharacterInvalid,
115/* 16 - Data Line Escape */ CharacterInvalid,
116/* 17 - Device Control 1 */ CharacterInvalid,
117/* 18 - Device Control 2 */ CharacterInvalid,
118/* 19 - Device Control 3 */ CharacterInvalid,
119/* 20 - Device Control 4 */ CharacterInvalid,
120/* 21 - Negative Ack. */ CharacterInvalid,
121/* 22 - Synchronous Idle */ CharacterInvalid,
122/* 23 - End of Transmit */ CharacterInvalid,
123/* 24 - Cancel */ CharacterInvalid,
124/* 25 - End of Medium */ CharacterInvalid,
125/* 26 - Substitute */ CharacterInvalid,
126/* 27 - Escape */ CharacterInvalid,
127/* 28 - File Separator */ CharacterInvalid,
128/* 29 - Group Separator */ CharacterInvalid,
129/* 30 - Record Separator */ CharacterInvalid,
130/* 31 - Unit Separator */ CharacterInvalid,
131/* 32 - Space */ CharacterWhiteSpace,
132/* 33 - ! */ CharacterExclamationMark,
133/* 34 - " */ CharacterQuote,
134/* 35 - # */ CharacterHash,
135/* 36 - $ */ CharacterIdentifierStart,
136/* 37 - % */ CharacterModulo,
137/* 38 - & */ CharacterAnd,
138/* 39 - ' */ CharacterQuote,
139/* 40 - ( */ CharacterOpenParen,
140/* 41 - ) */ CharacterCloseParen,
141/* 42 - * */ CharacterMultiply,
142/* 43 - + */ CharacterAdd,
143/* 44 - , */ CharacterComma,
144/* 45 - - */ CharacterSub,
145/* 46 - . */ CharacterDot,
146/* 47 - / */ CharacterSlash,
147/* 48 - 0 */ CharacterZero,
148/* 49 - 1 */ CharacterNumber,
149/* 50 - 2 */ CharacterNumber,
150/* 51 - 3 */ CharacterNumber,
151/* 52 - 4 */ CharacterNumber,
152/* 53 - 5 */ CharacterNumber,
153/* 54 - 6 */ CharacterNumber,
154/* 55 - 7 */ CharacterNumber,
155/* 56 - 8 */ CharacterNumber,
156/* 57 - 9 */ CharacterNumber,
157/* 58 - : */ CharacterColon,
158/* 59 - ; */ CharacterSemicolon,
159/* 60 - < */ CharacterLess,
160/* 61 - = */ CharacterEqual,
161/* 62 - > */ CharacterGreater,
162/* 63 - ? */ CharacterQuestion,
163/* 64 - @ */ CharacterPrivateIdentifierStart,
164/* 65 - A */ CharacterIdentifierStart,
165/* 66 - B */ CharacterIdentifierStart,
166/* 67 - C */ CharacterIdentifierStart,
167/* 68 - D */ CharacterIdentifierStart,
168/* 69 - E */ CharacterIdentifierStart,
169/* 70 - F */ CharacterIdentifierStart,
170/* 71 - G */ CharacterIdentifierStart,
171/* 72 - H */ CharacterIdentifierStart,
172/* 73 - I */ CharacterIdentifierStart,
173/* 74 - J */ CharacterIdentifierStart,
174/* 75 - K */ CharacterIdentifierStart,
175/* 76 - L */ CharacterIdentifierStart,
176/* 77 - M */ CharacterIdentifierStart,
177/* 78 - N */ CharacterIdentifierStart,
178/* 79 - O */ CharacterIdentifierStart,
179/* 80 - P */ CharacterIdentifierStart,
180/* 81 - Q */ CharacterIdentifierStart,
181/* 82 - R */ CharacterIdentifierStart,
182/* 83 - S */ CharacterIdentifierStart,
183/* 84 - T */ CharacterIdentifierStart,
184/* 85 - U */ CharacterIdentifierStart,
185/* 86 - V */ CharacterIdentifierStart,
186/* 87 - W */ CharacterIdentifierStart,
187/* 88 - X */ CharacterIdentifierStart,
188/* 89 - Y */ CharacterIdentifierStart,
189/* 90 - Z */ CharacterIdentifierStart,
190/* 91 - [ */ CharacterOpenBracket,
191/* 92 - \ */ CharacterBackSlash,
192/* 93 - ] */ CharacterCloseBracket,
193/* 94 - ^ */ CharacterXor,
194/* 95 - _ */ CharacterIdentifierStart,
195/* 96 - ` */ CharacterBackQuote,
196/* 97 - a */ CharacterIdentifierStart,
197/* 98 - b */ CharacterIdentifierStart,
198/* 99 - c */ CharacterIdentifierStart,
199/* 100 - d */ CharacterIdentifierStart,
200/* 101 - e */ CharacterIdentifierStart,
201/* 102 - f */ CharacterIdentifierStart,
202/* 103 - g */ CharacterIdentifierStart,
203/* 104 - h */ CharacterIdentifierStart,
204/* 105 - i */ CharacterIdentifierStart,
205/* 106 - j */ CharacterIdentifierStart,
206/* 107 - k */ CharacterIdentifierStart,
207/* 108 - l */ CharacterIdentifierStart,
208/* 109 - m */ CharacterIdentifierStart,
209/* 110 - n */ CharacterIdentifierStart,
210/* 111 - o */ CharacterIdentifierStart,
211/* 112 - p */ CharacterIdentifierStart,
212/* 113 - q */ CharacterIdentifierStart,
213/* 114 - r */ CharacterIdentifierStart,
214/* 115 - s */ CharacterIdentifierStart,
215/* 116 - t */ CharacterIdentifierStart,
216/* 117 - u */ CharacterIdentifierStart,
217/* 118 - v */ CharacterIdentifierStart,
218/* 119 - w */ CharacterIdentifierStart,
219/* 120 - x */ CharacterIdentifierStart,
220/* 121 - y */ CharacterIdentifierStart,
221/* 122 - z */ CharacterIdentifierStart,
222/* 123 - { */ CharacterOpenBrace,
223/* 124 - | */ CharacterOr,
224/* 125 - } */ CharacterCloseBrace,
225/* 126 - ~ */ CharacterTilde,
226/* 127 - Delete */ CharacterInvalid,
227/* 128 - Cc category */ CharacterInvalid,
228/* 129 - Cc category */ CharacterInvalid,
229/* 130 - Cc category */ CharacterInvalid,
230/* 131 - Cc category */ CharacterInvalid,
231/* 132 - Cc category */ CharacterInvalid,
232/* 133 - Cc category */ CharacterInvalid,
233/* 134 - Cc category */ CharacterInvalid,
234/* 135 - Cc category */ CharacterInvalid,
235/* 136 - Cc category */ CharacterInvalid,
236/* 137 - Cc category */ CharacterInvalid,
237/* 138 - Cc category */ CharacterInvalid,
238/* 139 - Cc category */ CharacterInvalid,
239/* 140 - Cc category */ CharacterInvalid,
240/* 141 - Cc category */ CharacterInvalid,
241/* 142 - Cc category */ CharacterInvalid,
242/* 143 - Cc category */ CharacterInvalid,
243/* 144 - Cc category */ CharacterInvalid,
244/* 145 - Cc category */ CharacterInvalid,
245/* 146 - Cc category */ CharacterInvalid,
246/* 147 - Cc category */ CharacterInvalid,
247/* 148 - Cc category */ CharacterInvalid,
248/* 149 - Cc category */ CharacterInvalid,
249/* 150 - Cc category */ CharacterInvalid,
250/* 151 - Cc category */ CharacterInvalid,
251/* 152 - Cc category */ CharacterInvalid,
252/* 153 - Cc category */ CharacterInvalid,
253/* 154 - Cc category */ CharacterInvalid,
254/* 155 - Cc category */ CharacterInvalid,
255/* 156 - Cc category */ CharacterInvalid,
256/* 157 - Cc category */ CharacterInvalid,
257/* 158 - Cc category */ CharacterInvalid,
258/* 159 - Cc category */ CharacterInvalid,
259/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
260/* 161 - Po category */ CharacterInvalid,
261/* 162 - Sc category */ CharacterInvalid,
262/* 163 - Sc category */ CharacterInvalid,
263/* 164 - Sc category */ CharacterInvalid,
264/* 165 - Sc category */ CharacterInvalid,
265/* 166 - So category */ CharacterInvalid,
266/* 167 - So category */ CharacterInvalid,
267/* 168 - Sk category */ CharacterInvalid,
268/* 169 - So category */ CharacterInvalid,
269/* 170 - Ll category */ CharacterIdentifierStart,
270/* 171 - Pi category */ CharacterInvalid,
271/* 172 - Sm category */ CharacterInvalid,
272/* 173 - Cf category */ CharacterInvalid,
273/* 174 - So category */ CharacterInvalid,
274/* 175 - Sk category */ CharacterInvalid,
275/* 176 - So category */ CharacterInvalid,
276/* 177 - Sm category */ CharacterInvalid,
277/* 178 - No category */ CharacterInvalid,
278/* 179 - No category */ CharacterInvalid,
279/* 180 - Sk category */ CharacterInvalid,
280/* 181 - Ll category */ CharacterIdentifierStart,
281/* 182 - So category */ CharacterInvalid,
282/* 183 - Po category */ CharacterOtherIdentifierPart,
283/* 184 - Sk category */ CharacterInvalid,
284/* 185 - No category */ CharacterInvalid,
285/* 186 - Ll category */ CharacterIdentifierStart,
286/* 187 - Pf category */ CharacterInvalid,
287/* 188 - No category */ CharacterInvalid,
288/* 189 - No category */ CharacterInvalid,
289/* 190 - No category */ CharacterInvalid,
290/* 191 - Po category */ CharacterInvalid,
291/* 192 - Lu category */ CharacterIdentifierStart,
292/* 193 - Lu category */ CharacterIdentifierStart,
293/* 194 - Lu category */ CharacterIdentifierStart,
294/* 195 - Lu category */ CharacterIdentifierStart,
295/* 196 - Lu category */ CharacterIdentifierStart,
296/* 197 - Lu category */ CharacterIdentifierStart,
297/* 198 - Lu category */ CharacterIdentifierStart,
298/* 199 - Lu category */ CharacterIdentifierStart,
299/* 200 - Lu category */ CharacterIdentifierStart,
300/* 201 - Lu category */ CharacterIdentifierStart,
301/* 202 - Lu category */ CharacterIdentifierStart,
302/* 203 - Lu category */ CharacterIdentifierStart,
303/* 204 - Lu category */ CharacterIdentifierStart,
304/* 205 - Lu category */ CharacterIdentifierStart,
305/* 206 - Lu category */ CharacterIdentifierStart,
306/* 207 - Lu category */ CharacterIdentifierStart,
307/* 208 - Lu category */ CharacterIdentifierStart,
308/* 209 - Lu category */ CharacterIdentifierStart,
309/* 210 - Lu category */ CharacterIdentifierStart,
310/* 211 - Lu category */ CharacterIdentifierStart,
311/* 212 - Lu category */ CharacterIdentifierStart,
312/* 213 - Lu category */ CharacterIdentifierStart,
313/* 214 - Lu category */ CharacterIdentifierStart,
314/* 215 - Sm category */ CharacterInvalid,
315/* 216 - Lu category */ CharacterIdentifierStart,
316/* 217 - Lu category */ CharacterIdentifierStart,
317/* 218 - Lu category */ CharacterIdentifierStart,
318/* 219 - Lu category */ CharacterIdentifierStart,
319/* 220 - Lu category */ CharacterIdentifierStart,
320/* 221 - Lu category */ CharacterIdentifierStart,
321/* 222 - Lu category */ CharacterIdentifierStart,
322/* 223 - Ll category */ CharacterIdentifierStart,
323/* 224 - Ll category */ CharacterIdentifierStart,
324/* 225 - Ll category */ CharacterIdentifierStart,
325/* 226 - Ll category */ CharacterIdentifierStart,
326/* 227 - Ll category */ CharacterIdentifierStart,
327/* 228 - Ll category */ CharacterIdentifierStart,
328/* 229 - Ll category */ CharacterIdentifierStart,
329/* 230 - Ll category */ CharacterIdentifierStart,
330/* 231 - Ll category */ CharacterIdentifierStart,
331/* 232 - Ll category */ CharacterIdentifierStart,
332/* 233 - Ll category */ CharacterIdentifierStart,
333/* 234 - Ll category */ CharacterIdentifierStart,
334/* 235 - Ll category */ CharacterIdentifierStart,
335/* 236 - Ll category */ CharacterIdentifierStart,
336/* 237 - Ll category */ CharacterIdentifierStart,
337/* 238 - Ll category */ CharacterIdentifierStart,
338/* 239 - Ll category */ CharacterIdentifierStart,
339/* 240 - Ll category */ CharacterIdentifierStart,
340/* 241 - Ll category */ CharacterIdentifierStart,
341/* 242 - Ll category */ CharacterIdentifierStart,
342/* 243 - Ll category */ CharacterIdentifierStart,
343/* 244 - Ll category */ CharacterIdentifierStart,
344/* 245 - Ll category */ CharacterIdentifierStart,
345/* 246 - Ll category */ CharacterIdentifierStart,
346/* 247 - Sm category */ CharacterInvalid,
347/* 248 - Ll category */ CharacterIdentifierStart,
348/* 249 - Ll category */ CharacterIdentifierStart,
349/* 250 - Ll category */ CharacterIdentifierStart,
350/* 251 - Ll category */ CharacterIdentifierStart,
351/* 252 - Ll category */ CharacterIdentifierStart,
352/* 253 - Ll category */ CharacterIdentifierStart,
353/* 254 - Ll category */ CharacterIdentifierStart,
354/* 255 - Ll category */ CharacterIdentifierStart
355};
356
357// This table provides the character that results from \X where X is the index in the table beginning
358// with SPACE. A table value of 0 means that more processing needs to be done.
359static constexpr const LChar singleCharacterEscapeValuesForASCII[128] = {
360/* 0 - Null */ 0,
361/* 1 - Start of Heading */ 0,
362/* 2 - Start of Text */ 0,
363/* 3 - End of Text */ 0,
364/* 4 - End of Transm. */ 0,
365/* 5 - Enquiry */ 0,
366/* 6 - Acknowledgment */ 0,
367/* 7 - Bell */ 0,
368/* 8 - Back Space */ 0,
369/* 9 - Horizontal Tab */ 0,
370/* 10 - Line Feed */ 0,
371/* 11 - Vertical Tab */ 0,
372/* 12 - Form Feed */ 0,
373/* 13 - Carriage Return */ 0,
374/* 14 - Shift Out */ 0,
375/* 15 - Shift In */ 0,
376/* 16 - Data Line Escape */ 0,
377/* 17 - Device Control 1 */ 0,
378/* 18 - Device Control 2 */ 0,
379/* 19 - Device Control 3 */ 0,
380/* 20 - Device Control 4 */ 0,
381/* 21 - Negative Ack. */ 0,
382/* 22 - Synchronous Idle */ 0,
383/* 23 - End of Transmit */ 0,
384/* 24 - Cancel */ 0,
385/* 25 - End of Medium */ 0,
386/* 26 - Substitute */ 0,
387/* 27 - Escape */ 0,
388/* 28 - File Separator */ 0,
389/* 29 - Group Separator */ 0,
390/* 30 - Record Separator */ 0,
391/* 31 - Unit Separator */ 0,
392/* 32 - Space */ ' ',
393/* 33 - ! */ '!',
394/* 34 - " */ '"',
395/* 35 - # */ '#',
396/* 36 - $ */ '$',
397/* 37 - % */ '%',
398/* 38 - & */ '&',
399/* 39 - ' */ '\'',
400/* 40 - ( */ '(',
401/* 41 - ) */ ')',
402/* 42 - * */ '*',
403/* 43 - + */ '+',
404/* 44 - , */ ',',
405/* 45 - - */ '-',
406/* 46 - . */ '.',
407/* 47 - / */ '/',
408/* 48 - 0 */ 0,
409/* 49 - 1 */ 0,
410/* 50 - 2 */ 0,
411/* 51 - 3 */ 0,
412/* 52 - 4 */ 0,
413/* 53 - 5 */ 0,
414/* 54 - 6 */ 0,
415/* 55 - 7 */ 0,
416/* 56 - 8 */ 0,
417/* 57 - 9 */ 0,
418/* 58 - : */ ':',
419/* 59 - ; */ ';',
420/* 60 - < */ '<',
421/* 61 - = */ '=',
422/* 62 - > */ '>',
423/* 63 - ? */ '?',
424/* 64 - @ */ '@',
425/* 65 - A */ 'A',
426/* 66 - B */ 'B',
427/* 67 - C */ 'C',
428/* 68 - D */ 'D',
429/* 69 - E */ 'E',
430/* 70 - F */ 'F',
431/* 71 - G */ 'G',
432/* 72 - H */ 'H',
433/* 73 - I */ 'I',
434/* 74 - J */ 'J',
435/* 75 - K */ 'K',
436/* 76 - L */ 'L',
437/* 77 - M */ 'M',
438/* 78 - N */ 'N',
439/* 79 - O */ 'O',
440/* 80 - P */ 'P',
441/* 81 - Q */ 'Q',
442/* 82 - R */ 'R',
443/* 83 - S */ 'S',
444/* 84 - T */ 'T',
445/* 85 - U */ 'U',
446/* 86 - V */ 'V',
447/* 87 - W */ 'W',
448/* 88 - X */ 'X',
449/* 89 - Y */ 'Y',
450/* 90 - Z */ 'Z',
451/* 91 - [ */ '[',
452/* 92 - \ */ '\\',
453/* 93 - ] */ ']',
454/* 94 - ^ */ '^',
455/* 95 - _ */ '_',
456/* 96 - ` */ '`',
457/* 97 - a */ 'a',
458/* 98 - b */ 0x08,
459/* 99 - c */ 'c',
460/* 100 - d */ 'd',
461/* 101 - e */ 'e',
462/* 102 - f */ 0x0C,
463/* 103 - g */ 'g',
464/* 104 - h */ 'h',
465/* 105 - i */ 'i',
466/* 106 - j */ 'j',
467/* 107 - k */ 'k',
468/* 108 - l */ 'l',
469/* 109 - m */ 'm',
470/* 110 - n */ 0x0A,
471/* 111 - o */ 'o',
472/* 112 - p */ 'p',
473/* 113 - q */ 'q',
474/* 114 - r */ 0x0D,
475/* 115 - s */ 's',
476/* 116 - t */ 0x09,
477/* 117 - u */ 0,
478/* 118 - v */ 0x0B,
479/* 119 - w */ 'w',
480/* 120 - x */ 0,
481/* 121 - y */ 'y',
482/* 122 - z */ 'z',
483/* 123 - { */ '{',
484/* 124 - | */ '|',
485/* 125 - } */ '}',
486/* 126 - ~ */ '~',
487/* 127 - Delete */ 0
488};
489
490template <typename T>
491Lexer<T>::Lexer(VM& vm, JSParserBuiltinMode builtinMode, JSParserScriptMode scriptMode)
492 : m_positionBeforeLastNewline(0,0,0)
493 , m_isReparsingFunction(false)
494 , m_vm(vm)
495 , m_parsingBuiltinFunction(builtinMode == JSParserBuiltinMode::Builtin)
496 , m_scriptMode(scriptMode)
497{
498}
499
500static inline JSTokenType tokenTypeForIntegerLikeToken(double doubleValue)
501{
502 if ((doubleValue || !std::signbit(doubleValue)) && static_cast<int64_t>(doubleValue) == doubleValue)
503 return INTEGER;
504 return DOUBLE;
505}
506
507template <typename T>
508Lexer<T>::~Lexer()
509{
510}
511
512template <typename T>
513String Lexer<T>::invalidCharacterMessage() const
514{
515 switch (m_current) {
516 case 0:
517 return "Invalid character: '\\0'"_s;
518 case 10:
519 return "Invalid character: '\\n'"_s;
520 case 11:
521 return "Invalid character: '\\v'"_s;
522 case 13:
523 return "Invalid character: '\\r'"_s;
524 case 35:
525 return "Invalid character: '#'"_s;
526 case 64:
527 return "Invalid character: '@'"_s;
528 case 96:
529 return "Invalid character: '`'"_s;
530 default:
531 return makeString("Invalid character '\\u", hex(m_current, 4, Lowercase), '\'');
532 }
533}
534
535template <typename T>
536ALWAYS_INLINE const T* Lexer<T>::currentSourcePtr() const
537{
538 ASSERT(m_code <= m_codeEnd);
539 return m_code;
540}
541
542template <typename T>
543void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
544{
545 m_arena = &arena->identifierArena();
546
547 m_lineNumber = source.firstLine().oneBasedInt();
548 m_lastToken = -1;
549
550 StringView sourceString = source.provider()->source();
551
552 if (!sourceString.isNull())
553 setCodeStart(sourceString);
554 else
555 m_codeStart = nullptr;
556
557 m_source = &source;
558 m_sourceOffset = source.startOffset();
559 m_codeStartPlusOffset = m_codeStart + source.startOffset();
560 m_code = m_codeStartPlusOffset;
561 m_codeEnd = m_codeStart + source.endOffset();
562 m_error = false;
563 m_atLineStart = true;
564 m_lineStart = m_code;
565 m_lexErrorMessage = String();
566 m_sourceURLDirective = String();
567 m_sourceMappingURLDirective = String();
568
569 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
570 m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
571 m_bufferForRawTemplateString16.reserveInitialCapacity(initialReadBufferCapacity);
572
573 if (LIKELY(m_code < m_codeEnd))
574 m_current = *m_code;
575 else
576 m_current = 0;
577 ASSERT(currentOffset() == source.startOffset());
578}
579
580template <typename T>
581template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
582{
583 m_code += shiftAmount;
584 ASSERT(currentOffset() >= currentLineStartOffset());
585 m_current = *m_code;
586}
587
588template <typename T>
589ALWAYS_INLINE void Lexer<T>::shift()
590{
591 // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
592 m_current = 0;
593 ++m_code;
594 if (LIKELY(m_code < m_codeEnd))
595 m_current = *m_code;
596}
597
598template <typename T>
599ALWAYS_INLINE bool Lexer<T>::atEnd() const
600{
601 ASSERT(!m_current || m_code < m_codeEnd);
602 return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
603}
604
605template <typename T>
606ALWAYS_INLINE T Lexer<T>::peek(int offset) const
607{
608 ASSERT(offset > 0 && offset < 5);
609 const T* code = m_code + offset;
610 return (code < m_codeEnd) ? *code : 0;
611}
612
613struct ParsedUnicodeEscapeValue {
614 ParsedUnicodeEscapeValue(UChar32 value)
615 : m_value(value)
616 {
617 ASSERT(isValid());
618 }
619
620 enum SpecialValueType { Incomplete = -2, Invalid = -1 };
621 ParsedUnicodeEscapeValue(SpecialValueType type)
622 : m_value(type)
623 {
624 }
625
626 bool isValid() const { return m_value >= 0; }
627 bool isIncomplete() const { return m_value == Incomplete; }
628
629 UChar32 value() const
630 {
631 ASSERT(isValid());
632 return m_value;
633 }
634
635private:
636 UChar32 m_value;
637};
638
639template<typename CharacterType>
640ParsedUnicodeEscapeValue Lexer<CharacterType>::parseUnicodeEscape()
641{
642 if (m_current == '{') {
643 shift();
644 UChar32 codePoint = 0;
645 do {
646 if (!isASCIIHexDigit(m_current))
647 return m_current ? ParsedUnicodeEscapeValue::Invalid : ParsedUnicodeEscapeValue::Incomplete;
648 codePoint = (codePoint << 4) | toASCIIHexValue(m_current);
649 if (codePoint > UCHAR_MAX_VALUE) {
650 // For raw template literal syntax, we consume `NotEscapeSequence`.
651 // Here, we consume NotCodePoint's HexDigits.
652 //
653 // NotEscapeSequence ::
654 // u { [lookahread not one of HexDigit]
655 // u { NotCodePoint
656 // u { CodePoint [lookahead != }]
657 //
658 // NotCodePoint ::
659 // HexDigits but not if MV of HexDigits <= 0x10FFFF
660 //
661 // CodePoint ::
662 // HexDigits but not if MV of HexDigits > 0x10FFFF
663 shift();
664 while (isASCIIHexDigit(m_current))
665 shift();
666
667 return atEnd() ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
668 }
669 shift();
670 } while (m_current != '}');
671 shift();
672 return codePoint;
673 }
674
675 auto character2 = peek(1);
676 auto character3 = peek(2);
677 auto character4 = peek(3);
678 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(character2) || !isASCIIHexDigit(character3) || !isASCIIHexDigit(character4))) {
679 auto result = (m_code + 4) >= m_codeEnd ? ParsedUnicodeEscapeValue::Incomplete : ParsedUnicodeEscapeValue::Invalid;
680
681 // For raw template literal syntax, we consume `NotEscapeSequence`.
682 //
683 // NotEscapeSequence ::
684 // u [lookahead not one of HexDigit][lookahead != {]
685 // u HexDigit [lookahead not one of HexDigit]
686 // u HexDigit HexDigit [lookahead not one of HexDigit]
687 // u HexDigit HexDigit HexDigit [lookahead not one of HexDigit]
688 while (isASCIIHexDigit(m_current))
689 shift();
690
691 return result;
692 }
693
694 auto result = convertUnicode(m_current, character2, character3, character4);
695 shift();
696 shift();
697 shift();
698 shift();
699 return result;
700}
701
702template <typename T>
703void Lexer<T>::shiftLineTerminator()
704{
705 ASSERT(isLineTerminator(m_current));
706
707 m_positionBeforeLastNewline = currentPosition();
708 T prev = m_current;
709 shift();
710
711 if (prev == '\r' && m_current == '\n')
712 shift();
713
714 ++m_lineNumber;
715 m_lineStart = m_code;
716}
717
718template <typename T>
719ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
720{
721 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
722}
723
724template <typename T>
725ALWAYS_INLINE void Lexer<T>::skipWhitespace()
726{
727 while (isWhiteSpace(m_current))
728 shift();
729}
730
731static bool isNonLatin1IdentStart(UChar32 c)
732{
733 return u_hasBinaryProperty(c, UCHAR_ID_START);
734}
735
736template<typename CharacterType>
737static ALWAYS_INLINE bool isIdentStart(CharacterType c)
738{
739 static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentStart for UChars that don't need to check for surrogate pairs");
740 if (!isLatin1(c))
741 return isNonLatin1IdentStart(c);
742 return typesOfLatin1Characters[static_cast<LChar>(c)] == CharacterIdentifierStart;
743}
744
745static ALWAYS_INLINE UNUSED_FUNCTION bool isSingleCharacterIdentStart(UChar c)
746{
747 if (LIKELY(isLatin1(c)))
748 return isIdentStart(static_cast<LChar>(c));
749 return !U16_IS_SURROGATE(c) && isIdentStart(static_cast<UChar32>(c));
750}
751
752static ALWAYS_INLINE bool cannotBeIdentStart(LChar c)
753{
754 return !isIdentStart(c) && c != '\\';
755}
756
757static ALWAYS_INLINE bool cannotBeIdentStart(UChar c)
758{
759 if (LIKELY(isLatin1(c)))
760 return cannotBeIdentStart(static_cast<LChar>(c));
761 return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
762}
763
764static NEVER_INLINE bool isNonLatin1IdentPart(UChar32 c)
765{
766 return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE) || c == 0x200C || c == 0x200D;
767}
768
769template<typename CharacterType>
770static ALWAYS_INLINE bool isIdentPart(CharacterType c)
771{
772 static_assert(std::is_same_v<CharacterType, LChar> || std::is_same_v<CharacterType, UChar32>, "Call isSingleCharacterIdentPart for UChars that don't need to check for surrogate pairs");
773 if (!isLatin1(c))
774 return isNonLatin1IdentPart(c);
775
776 // Character types are divided into two groups depending on whether they can be part of an
777 // identifier or not. Those whose type value is less or equal than CharacterOtherIdentifierPart can be
778 // part of an identifier. (See the CharacterType definition for more details.)
779 return typesOfLatin1Characters[static_cast<LChar>(c)] <= CharacterOtherIdentifierPart;
780}
781
782static ALWAYS_INLINE bool isSingleCharacterIdentPart(UChar c)
783{
784 if (LIKELY(isLatin1(c)))
785 return isIdentPart(static_cast<LChar>(c));
786 return !U16_IS_SURROGATE(c) && isIdentPart(static_cast<UChar32>(c));
787}
788
789static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(LChar c)
790{
791 return !isIdentPart(c) && c != '\\';
792}
793
794// NOTE: This may give give false negatives (for non-ascii) but won't give false posititves.
795// This means it can be used to detect the end of a keyword (all keywords are ascii)
796static ALWAYS_INLINE bool cannotBeIdentPartOrEscapeStart(UChar c)
797{
798 if (LIKELY(isLatin1(c)))
799 return cannotBeIdentPartOrEscapeStart(static_cast<LChar>(c));
800 return Lexer<UChar>::isWhiteSpace(c) || Lexer<UChar>::isLineTerminator(c);
801}
802
803
804template<>
805ALWAYS_INLINE UChar32 Lexer<LChar>::currentCodePoint() const
806{
807 return m_current;
808}
809
810template<>
811ALWAYS_INLINE UChar32 Lexer<UChar>::currentCodePoint() const
812{
813 ASSERT_WITH_MESSAGE(!isIdentStart(static_cast<UChar32>(U_SENTINEL)), "error values shouldn't appear as a valid identifier start code point");
814 if (!U16_IS_SURROGATE(m_current))
815 return m_current;
816
817 UChar trail = peek(1);
818 if (UNLIKELY(!U16_IS_LEAD(m_current) || !U16_IS_SURROGATE_TRAIL(trail)))
819 return U_SENTINEL;
820
821 UChar32 codePoint = U16_GET_SUPPLEMENTARY(m_current, trail);
822 return codePoint;
823}
824
825template<typename CharacterType>
826static inline bool isASCIIDigitOrSeparator(CharacterType character)
827{
828 return isASCIIDigit(character) || character == '_';
829}
830
831template<typename CharacterType>
832static inline bool isASCIIHexDigitOrSeparator(CharacterType character)
833{
834 return isASCIIHexDigit(character) || character == '_';
835}
836
837template<typename CharacterType>
838static inline bool isASCIIBinaryDigitOrSeparator(CharacterType character)
839{
840 return isASCIIBinaryDigit(character) || character == '_';
841}
842
843template<typename CharacterType>
844static inline bool isASCIIOctalDigitOrSeparator(CharacterType character)
845{
846 return isASCIIOctalDigit(character) || character == '_';
847}
848
849static inline LChar singleEscape(int c)
850{
851 if (c < 128) {
852 ASSERT(static_cast<size_t>(c) < WTF_ARRAY_LENGTH(singleCharacterEscapeValuesForASCII));
853 return singleCharacterEscapeValuesForASCII[c];
854 }
855 return 0;
856}
857
858template <typename T>
859inline void Lexer<T>::record8(int c)
860{
861 ASSERT(isLatin1(c));
862 m_buffer8.append(static_cast<LChar>(c));
863}
864
865template <typename T>
866inline void Lexer<T>::append8(const T* p, size_t length)
867{
868 size_t currentSize = m_buffer8.size();
869 m_buffer8.grow(currentSize + length);
870 LChar* rawBuffer = m_buffer8.data() + currentSize;
871
872 for (size_t i = 0; i < length; i++) {
873 T c = p[i];
874 ASSERT(isLatin1(c));
875 rawBuffer[i] = c;
876 }
877}
878
879template <typename T>
880inline void Lexer<T>::append16(const LChar* p, size_t length)
881{
882 size_t currentSize = m_buffer16.size();
883 m_buffer16.grow(currentSize + length);
884 UChar* rawBuffer = m_buffer16.data() + currentSize;
885
886 for (size_t i = 0; i < length; i++)
887 rawBuffer[i] = p[i];
888}
889
890template <typename T>
891inline void Lexer<T>::record16(T c)
892{
893 m_buffer16.append(c);
894}
895
896template <typename T>
897inline void Lexer<T>::record16(int c)
898{
899 ASSERT(c >= 0);
900 ASSERT(c <= static_cast<int>(USHRT_MAX));
901 m_buffer16.append(static_cast<UChar>(c));
902}
903
904template<typename CharacterType> inline void Lexer<CharacterType>::recordUnicodeCodePoint(UChar32 codePoint)
905{
906 ASSERT(codePoint >= 0);
907 ASSERT(codePoint <= UCHAR_MAX_VALUE);
908 if (U_IS_BMP(codePoint))
909 record16(codePoint);
910 else {
911 UChar codeUnits[2] = { U16_LEAD(codePoint), U16_TRAIL(codePoint) };
912 append16(codeUnits, 2);
913 }
914}
915
916#if ASSERT_ENABLED
917bool isSafeBuiltinIdentifier(VM& vm, const Identifier* ident)
918{
919 if (!ident)
920 return true;
921 /* Just block any use of suspicious identifiers. This is intended to
922 * be used as a safety net while implementing builtins.
923 */
924 // FIXME: How can a debug-only assertion be a safety net?
925 if (*ident == vm.propertyNames->builtinNames().callPublicName())
926 return false;
927 if (*ident == vm.propertyNames->builtinNames().applyPublicName())
928 return false;
929 if (*ident == vm.propertyNames->eval)
930 return false;
931 if (*ident == vm.propertyNames->Function)
932 return false;
933 return true;
934}
935#endif // ASSERT_ENABLED
936
937template <>
938template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
939{
940 tokenData->escaped = false;
941 const ptrdiff_t remaining = m_codeEnd - m_code;
942 if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
943 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
944 if (keyword != IDENT) {
945 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
946 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
947 }
948 }
949
950 bool isPrivateName = m_current == '#';
951 bool isBuiltinName = m_current == '@' && m_parsingBuiltinFunction;
952 bool isWellKnownSymbol = false;
953 if (isBuiltinName) {
954 ASSERT(m_parsingBuiltinFunction);
955 shift();
956 if (m_current == '@') {
957 isWellKnownSymbol = true;
958 shift();
959 }
960 }
961
962 const LChar* identifierStart = currentSourcePtr();
963
964 if (isPrivateName)
965 shift();
966
967 ASSERT(isIdentStart(m_current) || m_current == '\\');
968 while (isIdentPart(m_current))
969 shift();
970
971 if (UNLIKELY(m_current == '\\'))
972 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
973
974 const Identifier* ident = nullptr;
975
976 if (shouldCreateIdentifier || m_parsingBuiltinFunction) {
977 int identifierLength = currentSourcePtr() - identifierStart;
978 ident = makeIdentifier(identifierStart, identifierLength);
979 if (m_parsingBuiltinFunction && isBuiltinName) {
980 if (isWellKnownSymbol)
981 ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpWellKnownSymbol(identifierStart, identifierLength));
982 else
983 ident = &m_arena->makeIdentifier(m_vm, m_vm.propertyNames->builtinNames().lookUpPrivateName(identifierStart, identifierLength));
984 if (!ident)
985 return INVALID_PRIVATE_NAME_ERRORTOK;
986 } else {
987 ident = makeIdentifier(identifierStart, identifierLength);
988 if (m_parsingBuiltinFunction) {
989 if (!isSafeBuiltinIdentifier(m_vm, ident)) {
990 m_lexErrorMessage = makeString("The use of '", ident->string(), "' is disallowed in builtin functions.");
991 return ERRORTOK;
992 }
993 if (*ident == m_vm.propertyNames->undefinedKeyword)
994 tokenData->ident = &m_vm.propertyNames->undefinedPrivateName;
995 }
996 }
997 tokenData->ident = ident;
998 } else
999 tokenData->ident = nullptr;
1000
1001 auto identType = isPrivateName ? PRIVATENAME : IDENT;
1002 if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) && !isBuiltinName) {
1003 ASSERT(shouldCreateIdentifier);
1004 if (remaining < maxTokenLength) {
1005 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1006 ASSERT((remaining < maxTokenLength) || !entry);
1007 if (!entry)
1008 return identType;
1009 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1010 return (token != RESERVED_IF_STRICT) || strictMode ? token : identType;
1011 }
1012 return identType;
1013 }
1014
1015 return identType;
1016}
1017
1018template <>
1019template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1020{
1021 ASSERT(!m_parsingBuiltinFunction);
1022 tokenData->escaped = false;
1023 const ptrdiff_t remaining = m_codeEnd - m_code;
1024 if ((remaining >= maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords)) {
1025 JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
1026 if (keyword != IDENT) {
1027 ASSERT((!shouldCreateIdentifier) || tokenData->ident);
1028 return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
1029 }
1030 }
1031
1032 bool isPrivateName = m_current == '#';
1033 const UChar* identifierStart = currentSourcePtr();
1034
1035 if (isPrivateName)
1036 shift();
1037
1038 UChar orAllChars = 0;
1039 ASSERT(isSingleCharacterIdentStart(m_current) || U16_IS_SURROGATE(m_current) || m_current == '\\');
1040 while (isSingleCharacterIdentPart(m_current)) {
1041 orAllChars |= m_current;
1042 shift();
1043 }
1044
1045 if (UNLIKELY(U16_IS_SURROGATE(m_current) || m_current == '\\'))
1046 return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode, identifierStart);
1047
1048 bool isAll8Bit = !(orAllChars & ~0xff);
1049 const Identifier* ident = nullptr;
1050
1051 if (shouldCreateIdentifier) {
1052 int identifierLength = currentSourcePtr() - identifierStart;
1053 if (isAll8Bit)
1054 ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
1055 else
1056 ident = makeIdentifier(identifierStart, identifierLength);
1057 tokenData->ident = ident;
1058 } else
1059 tokenData->ident = nullptr;
1060
1061 if (isPrivateName)
1062 return PRIVATENAME;
1063
1064 if (UNLIKELY((remaining < maxTokenLength) && !lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
1065 ASSERT(shouldCreateIdentifier);
1066 if (remaining < maxTokenLength) {
1067 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1068 ASSERT((remaining < maxTokenLength) || !entry);
1069 if (!entry)
1070 return IDENT;
1071 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1072 return (token != RESERVED_IF_STRICT) || strictMode ? token : IDENT;
1073 }
1074 return IDENT;
1075 }
1076
1077 return IDENT;
1078}
1079
1080template<typename CharacterType>
1081template<bool shouldCreateIdentifier>
1082JSTokenType Lexer<CharacterType>::parseIdentifierSlowCase(JSTokenData* tokenData, OptionSet<LexerFlags> lexerFlags, bool strictMode, const CharacterType* identifierStart)
1083{
1084 ASSERT(U16_IS_SURROGATE(m_current) || m_current == '\\');
1085 ASSERT(m_buffer16.isEmpty());
1086 ASSERT(!tokenData->escaped);
1087
1088 auto identCharsStart = identifierStart;
1089 bool isPrivateName = *identifierStart == '#';
1090 if (isPrivateName)
1091 ++identCharsStart;
1092
1093 JSTokenType identType = isPrivateName ? PRIVATENAME : IDENT;
1094 ASSERT(!isPrivateName || identifierStart != currentSourcePtr());
1095
1096 auto fillBuffer = [&] (bool isStart = false) {
1097 // \uXXXX unicode characters or Surrogate pairs.
1098 if (identifierStart != currentSourcePtr())
1099 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1100
1101 if (m_current == '\\') {
1102 tokenData->escaped = true;
1103 shift();
1104 if (UNLIKELY(m_current != 'u'))
1105 return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_ESCAPE_ERRORTOK;
1106 shift();
1107 auto character = parseUnicodeEscape();
1108 if (UNLIKELY(!character.isValid()))
1109 return character.isIncomplete() ? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK : INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1110 if (UNLIKELY(isStart ? !isIdentStart(character.value()) : !isIdentPart(character.value())))
1111 return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK;
1112 if (shouldCreateIdentifier)
1113 recordUnicodeCodePoint(character.value());
1114 identifierStart = currentSourcePtr();
1115 return identType;
1116 }
1117
1118 ASSERT(U16_IS_SURROGATE(m_current));
1119 if (UNLIKELY(!U16_IS_SURROGATE_LEAD(m_current)))
1120 return INVALID_UNICODE_ENCODING_ERRORTOK;
1121
1122 UChar32 codePoint = currentCodePoint();
1123 if (UNLIKELY(codePoint == U_SENTINEL))
1124 return INVALID_UNICODE_ENCODING_ERRORTOK;
1125 if (UNLIKELY(isStart ? !isNonLatin1IdentStart(codePoint) : !isNonLatin1IdentPart(codePoint)))
1126 return INVALID_IDENTIFIER_UNICODE_ERRORTOK;
1127 append16(m_code, 2);
1128 shift();
1129 shift();
1130 identifierStart = currentSourcePtr();
1131 return identType;
1132 };
1133
1134 JSTokenType type = fillBuffer(identCharsStart == currentSourcePtr());
1135 if (UNLIKELY(type & CanBeErrorTokenFlag))
1136 return type;
1137
1138 while (true) {
1139 if (LIKELY(isSingleCharacterIdentPart(m_current))) {
1140 shift();
1141 continue;
1142 }
1143 if (!U16_IS_SURROGATE(m_current) && m_current != '\\')
1144 break;
1145
1146 type = fillBuffer();
1147 if (UNLIKELY(type & CanBeErrorTokenFlag))
1148 return type;
1149 }
1150
1151 const Identifier* ident = nullptr;
1152 if (shouldCreateIdentifier) {
1153 if (identifierStart != currentSourcePtr())
1154 m_buffer16.append(identifierStart, currentSourcePtr() - identifierStart);
1155 ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1156
1157 tokenData->ident = ident;
1158 } else
1159 tokenData->ident = nullptr;
1160
1161 m_buffer16.shrink(0);
1162
1163 if (LIKELY(!lexerFlags.contains(LexerFlags::IgnoreReservedWords))) {
1164 ASSERT(shouldCreateIdentifier);
1165 const HashTableValue* entry = JSC::mainTable.entry(*ident);
1166 if (!entry)
1167 return identType;
1168 JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
1169 if ((token != RESERVED_IF_STRICT) || strictMode)
1170 return ESCAPED_KEYWORD;
1171 }
1172
1173 return identType;
1174}
1175
1176static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
1177{
1178 return character < 0xE;
1179}
1180
1181static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
1182{
1183 return character < 0xE || !isLatin1(character);
1184}
1185
1186template <typename T>
1187template <bool shouldBuildStrings> ALWAYS_INLINE typename Lexer<T>::StringParseResult Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
1188{
1189 int startingOffset = currentOffset();
1190 int startingLineStartOffset = currentLineStartOffset();
1191 int startingLineNumber = lineNumber();
1192 T stringQuoteCharacter = m_current;
1193 shift();
1194
1195 const T* stringStart = currentSourcePtr();
1196
1197 while (m_current != stringQuoteCharacter) {
1198 if (UNLIKELY(m_current == '\\')) {
1199 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1200 append8(stringStart, currentSourcePtr() - stringStart);
1201 shift();
1202
1203 LChar escape = singleEscape(m_current);
1204
1205 // Most common escape sequences first.
1206 if (escape) {
1207 if (shouldBuildStrings)
1208 record8(escape);
1209 shift();
1210 } else if (UNLIKELY(isLineTerminator(m_current)))
1211 shiftLineTerminator();
1212 else if (m_current == 'x') {
1213 shift();
1214 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1215 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1216 return (atEnd() || (isASCIIHexDigit(m_current) && (m_code + 1 == m_codeEnd))) ? StringUnterminated : StringCannotBeParsed;
1217 }
1218 T prev = m_current;
1219 shift();
1220 if (shouldBuildStrings)
1221 record8(convertHex(prev, m_current));
1222 shift();
1223 } else {
1224 setOffset(startingOffset, startingLineStartOffset);
1225 setLineNumber(startingLineNumber);
1226 m_buffer8.shrink(0);
1227 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1228 }
1229 stringStart = currentSourcePtr();
1230 continue;
1231 }
1232
1233 if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
1234 setOffset(startingOffset, startingLineStartOffset);
1235 setLineNumber(startingLineNumber);
1236 m_buffer8.shrink(0);
1237 return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
1238 }
1239
1240 shift();
1241 }
1242
1243 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1244 append8(stringStart, currentSourcePtr() - stringStart);
1245 if (shouldBuildStrings) {
1246 tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
1247 m_buffer8.shrink(0);
1248 } else
1249 tokenData->ident = nullptr;
1250
1251 return StringParsedSuccessfully;
1252}
1253
1254template <typename T>
1255template <bool shouldBuildStrings> ALWAYS_INLINE auto Lexer<T>::parseComplexEscape(bool strictMode) -> StringParseResult
1256{
1257 if (m_current == 'x') {
1258 shift();
1259 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(peek(1))) {
1260 // For raw template literal syntax, we consume `NotEscapeSequence`.
1261 //
1262 // NotEscapeSequence ::
1263 // x [lookahread not one of HexDigit]
1264 // x HexDigit [lookahread not one of HexDigit]
1265 if (isASCIIHexDigit(m_current))
1266 shift();
1267 ASSERT(!isASCIIHexDigit(m_current));
1268
1269 m_lexErrorMessage = "\\x can only be followed by a hex character sequence"_s;
1270 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1271 }
1272
1273 T prev = m_current;
1274 shift();
1275 if (shouldBuildStrings)
1276 record16(convertHex(prev, m_current));
1277 shift();
1278
1279 return StringParsedSuccessfully;
1280 }
1281
1282 if (m_current == 'u') {
1283 shift();
1284
1285 auto character = parseUnicodeEscape();
1286 if (character.isValid()) {
1287 if (shouldBuildStrings)
1288 recordUnicodeCodePoint(character.value());
1289 return StringParsedSuccessfully;
1290 }
1291
1292 m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence"_s;
1293 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1294 }
1295
1296 if (strictMode) {
1297 if (isASCIIDigit(m_current)) {
1298 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
1299 int character1 = m_current;
1300 shift();
1301 if (character1 != '0' || isASCIIDigit(m_current)) {
1302 // For raw template literal syntax, we consume `NotEscapeSequence`.
1303 //
1304 // NotEscapeSequence ::
1305 // 0 DecimalDigit
1306 // DecimalDigit but not 0
1307 if (character1 == '0')
1308 shift();
1309
1310 m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'"_s;
1311 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1312 }
1313 if (shouldBuildStrings)
1314 record16(0);
1315 return StringParsedSuccessfully;
1316 }
1317 } else {
1318 if (isASCIIOctalDigit(m_current)) {
1319 // Octal character sequences
1320 T character1 = m_current;
1321 shift();
1322 if (isASCIIOctalDigit(m_current)) {
1323 // Two octal characters
1324 T character2 = m_current;
1325 shift();
1326 if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
1327 if (shouldBuildStrings)
1328 record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
1329 shift();
1330 } else {
1331 if (shouldBuildStrings)
1332 record16((character1 - '0') * 8 + character2 - '0');
1333 }
1334 } else {
1335 if (shouldBuildStrings)
1336 record16(character1 - '0');
1337 }
1338 return StringParsedSuccessfully;
1339 }
1340 }
1341
1342 if (!atEnd()) {
1343 if (shouldBuildStrings)
1344 record16(m_current);
1345 shift();
1346 return StringParsedSuccessfully;
1347 }
1348
1349 m_lexErrorMessage = "Unterminated string constant"_s;
1350 return StringUnterminated;
1351}
1352
1353template <typename T>
1354template <bool shouldBuildStrings> auto Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode) -> StringParseResult
1355{
1356 T stringQuoteCharacter = m_current;
1357 shift();
1358
1359 const T* stringStart = currentSourcePtr();
1360
1361 while (m_current != stringQuoteCharacter) {
1362 if (UNLIKELY(m_current == '\\')) {
1363 if (stringStart != currentSourcePtr() && shouldBuildStrings)
1364 append16(stringStart, currentSourcePtr() - stringStart);
1365 shift();
1366
1367 LChar escape = singleEscape(m_current);
1368
1369 // Most common escape sequences first
1370 if (escape) {
1371 if (shouldBuildStrings)
1372 record16(escape);
1373 shift();
1374 } else if (UNLIKELY(isLineTerminator(m_current)))
1375 shiftLineTerminator();
1376 else {
1377 StringParseResult result = parseComplexEscape<shouldBuildStrings>(strictMode);
1378 if (result != StringParsedSuccessfully)
1379 return result;
1380 }
1381
1382 stringStart = currentSourcePtr();
1383 continue;
1384 }
1385 // Fast check for characters that require special handling.
1386 // Catches 0, \n, and \r as efficiently as possible, and lets through all common ASCII characters.
1387 static_assert(std::is_unsigned<T>::value, "Lexer expects an unsigned character type");
1388 if (UNLIKELY(m_current < 0xE)) {
1389 // New-line or end of input is not allowed
1390 if (atEnd() || m_current == '\r' || m_current == '\n') {
1391 m_lexErrorMessage = "Unexpected EOF"_s;
1392 return atEnd() ? StringUnterminated : StringCannotBeParsed;
1393 }
1394 // Anything else is just a normal character
1395 }
1396 shift();
1397 }
1398
1399 if (currentSourcePtr() != stringStart && shouldBuildStrings)
1400 append16(stringStart, currentSourcePtr() - stringStart);
1401 if (shouldBuildStrings)
1402 tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1403 else
1404 tokenData->ident = nullptr;
1405
1406 m_buffer16.shrink(0);
1407 return StringParsedSuccessfully;
1408}
1409
1410template <typename T>
1411typename Lexer<T>::StringParseResult Lexer<T>::parseTemplateLiteral(JSTokenData* tokenData, RawStringsBuildMode rawStringsBuildMode)
1412{
1413 bool parseCookedFailed = false;
1414 const T* stringStart = currentSourcePtr();
1415 const T* rawStringStart = currentSourcePtr();
1416
1417 while (m_current != '`') {
1418 if (UNLIKELY(m_current == '\\')) {
1419 if (stringStart != currentSourcePtr())
1420 append16(stringStart, currentSourcePtr() - stringStart);
1421 shift();
1422
1423 LChar escape = singleEscape(m_current);
1424
1425 // Most common escape sequences first.
1426 if (escape) {
1427 record16(escape);
1428 shift();
1429 } else if (UNLIKELY(isLineTerminator(m_current))) {
1430 // Normalize <CR>, <CR><LF> to <LF>.
1431 if (m_current == '\r') {
1432 ASSERT_WITH_MESSAGE(rawStringStart != currentSourcePtr(), "We should have at least shifted the escape.");
1433
1434 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings) {
1435 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1436 m_bufferForRawTemplateString16.append('\n');
1437 }
1438
1439 shiftLineTerminator();
1440 rawStringStart = currentSourcePtr();
1441 } else
1442 shiftLineTerminator();
1443 } else {
1444 bool strictMode = true;
1445 StringParseResult result = parseComplexEscape<true>(strictMode);
1446 if (result != StringParsedSuccessfully) {
1447 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings && result == StringCannotBeParsed)
1448 parseCookedFailed = true;
1449 else
1450 return result;
1451 }
1452 }
1453
1454 stringStart = currentSourcePtr();
1455 continue;
1456 }
1457
1458 if (m_current == '$' && peek(1) == '{')
1459 break;
1460
1461 // Fast check for characters that require special handling.
1462 // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
1463 // as possible, and lets through all common ASCII characters.
1464 if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
1465 // End of input is not allowed.
1466 // Unlike String, line terminator is allowed.
1467 if (atEnd()) {
1468 m_lexErrorMessage = "Unexpected EOF"_s;
1469 return StringUnterminated;
1470 }
1471
1472 if (isLineTerminator(m_current)) {
1473 if (m_current == '\r') {
1474 // Normalize <CR>, <CR><LF> to <LF>.
1475 if (stringStart != currentSourcePtr())
1476 append16(stringStart, currentSourcePtr() - stringStart);
1477 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1478 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1479
1480 record16('\n');
1481 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1482 m_bufferForRawTemplateString16.append('\n');
1483 shiftLineTerminator();
1484 stringStart = currentSourcePtr();
1485 rawStringStart = currentSourcePtr();
1486 } else
1487 shiftLineTerminator();
1488 continue;
1489 }
1490 // Anything else is just a normal character
1491 }
1492
1493 shift();
1494 }
1495
1496 bool isTail = m_current == '`';
1497
1498 if (currentSourcePtr() != stringStart)
1499 append16(stringStart, currentSourcePtr() - stringStart);
1500 if (rawStringStart != currentSourcePtr() && rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1501 m_bufferForRawTemplateString16.append(rawStringStart, currentSourcePtr() - rawStringStart);
1502
1503 if (!parseCookedFailed)
1504 tokenData->cooked = makeIdentifier(m_buffer16.data(), m_buffer16.size());
1505 else
1506 tokenData->cooked = nullptr;
1507
1508 // Line terminator normalization (e.g. <CR> => <LF>) should be applied to both the raw and cooked representations.
1509 if (rawStringsBuildMode == RawStringsBuildMode::BuildRawStrings)
1510 tokenData->raw = makeIdentifier(m_bufferForRawTemplateString16.data(), m_bufferForRawTemplateString16.size());
1511 else
1512 tokenData->raw = nullptr;
1513
1514 tokenData->isTail = isTail;
1515
1516 m_buffer16.shrink(0);
1517 m_bufferForRawTemplateString16.shrink(0);
1518
1519 if (isTail) {
1520 // Skip `
1521 shift();
1522 } else {
1523 // Skip $ and {
1524 shift();
1525 shift();
1526 }
1527
1528 return StringParsedSuccessfully;
1529}
1530
1531template <typename T>
1532ALWAYS_INLINE auto Lexer<T>::parseHex() -> std::optional<NumberParseResult>
1533{
1534 ASSERT(isASCIIHexDigit(m_current));
1535
1536 // Optimization: most hexadecimal values fit into 4 bytes.
1537 uint32_t hexValue = 0;
1538 int maximumDigits = 7;
1539
1540 do {
1541 if (m_current == '_') {
1542 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1543 return std::nullopt;
1544
1545 shift();
1546 }
1547
1548 hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
1549 shift();
1550 --maximumDigits;
1551 } while (isASCIIHexDigitOrSeparator(m_current) && maximumDigits >= 0);
1552
1553 if (LIKELY(maximumDigits >= 0 && m_current != 'n'))
1554 return NumberParseResult { static_cast<double>(hexValue) };
1555
1556 // No more place in the hexValue buffer.
1557 // The values are shifted out and placed into the m_buffer8 vector.
1558 for (int i = 0; i < 8; ++i) {
1559 int digit = hexValue >> 28;
1560 if (digit < 10)
1561 record8(digit + '0');
1562 else
1563 record8(digit - 10 + 'a');
1564 hexValue <<= 4;
1565 }
1566
1567 while (isASCIIHexDigitOrSeparator(m_current)) {
1568 if (m_current == '_') {
1569 if (UNLIKELY(!isASCIIHexDigit(peek(1))))
1570 return std::nullopt;
1571
1572 shift();
1573 }
1574
1575 record8(m_current);
1576 shift();
1577 }
1578
1579 if (UNLIKELY(m_current == 'n'))
1580 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1581
1582 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16) };
1583}
1584
1585template <typename T>
1586ALWAYS_INLINE auto Lexer<T>::parseBinary() -> std::optional<NumberParseResult>
1587{
1588 ASSERT(isASCIIBinaryDigit(m_current));
1589
1590 // Optimization: most binary values fit into 4 bytes.
1591 uint32_t binaryValue = 0;
1592 const unsigned maximumDigits = 32;
1593 int digit = maximumDigits - 1;
1594 // Temporary buffer for the digits. Makes easier
1595 // to reconstruct the input characters when needed.
1596 LChar digits[maximumDigits];
1597
1598 do {
1599 if (m_current == '_') {
1600 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1601 return std::nullopt;
1602
1603 shift();
1604 }
1605
1606 binaryValue = (binaryValue << 1) + (m_current - '0');
1607 digits[digit] = m_current;
1608 shift();
1609 --digit;
1610 } while (isASCIIBinaryDigitOrSeparator(m_current) && digit >= 0);
1611
1612 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1613 return NumberParseResult { static_cast<double>(binaryValue) };
1614
1615 for (int i = maximumDigits - 1; i > digit; --i)
1616 record8(digits[i]);
1617
1618 while (isASCIIBinaryDigitOrSeparator(m_current)) {
1619 if (m_current == '_') {
1620 if (UNLIKELY(!isASCIIBinaryDigit(peek(1))))
1621 return std::nullopt;
1622
1623 shift();
1624 }
1625
1626 record8(m_current);
1627 shift();
1628 }
1629
1630 if (UNLIKELY(m_current == 'n'))
1631 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1632
1633 if (isASCIIDigit(m_current))
1634 return std::nullopt;
1635
1636 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 2) };
1637}
1638
1639template <typename T>
1640ALWAYS_INLINE auto Lexer<T>::parseOctal() -> std::optional<NumberParseResult>
1641{
1642 ASSERT(isASCIIOctalDigit(m_current));
1643 ASSERT(!m_buffer8.size() || (m_buffer8.size() == 1 && m_buffer8[0] == '0'));
1644 bool isLegacyLiteral = m_buffer8.size();
1645
1646 // Optimization: most octal values fit into 4 bytes.
1647 uint32_t octalValue = 0;
1648 const unsigned maximumDigits = 10;
1649 int digit = maximumDigits - 1;
1650 // Temporary buffer for the digits. Makes easier
1651 // to reconstruct the input characters when needed.
1652 LChar digits[maximumDigits];
1653
1654 do {
1655 if (m_current == '_') {
1656 if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
1657 return std::nullopt;
1658
1659 shift();
1660 }
1661
1662 octalValue = octalValue * 8 + (m_current - '0');
1663 digits[digit] = m_current;
1664 shift();
1665 --digit;
1666 } while (isASCIIOctalDigitOrSeparator(m_current) && digit >= 0);
1667
1668 if (LIKELY(!isASCIIDigitOrSeparator(m_current) && digit >= 0 && m_current != 'n'))
1669 return NumberParseResult { static_cast<double>(octalValue) };
1670
1671 for (int i = maximumDigits - 1; i > digit; --i)
1672 record8(digits[i]);
1673
1674 while (isASCIIOctalDigitOrSeparator(m_current)) {
1675 if (m_current == '_') {
1676 if (UNLIKELY(!isASCIIOctalDigit(peek(1)) || isLegacyLiteral))
1677 return std::nullopt;
1678
1679 shift();
1680 }
1681
1682 record8(m_current);
1683 shift();
1684 }
1685
1686 if (UNLIKELY(m_current == 'n') && !isLegacyLiteral)
1687 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1688
1689 if (isASCIIDigit(m_current))
1690 return std::nullopt;
1691
1692 return NumberParseResult { parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8) };
1693}
1694
1695template <typename T>
1696ALWAYS_INLINE auto Lexer<T>::parseDecimal() -> std::optional<NumberParseResult>
1697{
1698 ASSERT(isASCIIDigit(m_current) || m_buffer8.size());
1699 bool isLegacyLiteral = m_buffer8.size() && isASCIIDigitOrSeparator(m_current);
1700
1701 // Optimization: most decimal values fit into 4 bytes.
1702 uint32_t decimalValue = 0;
1703
1704 // Since parseOctal may be executed before parseDecimal,
1705 // the m_buffer8 may hold ascii digits.
1706 if (!m_buffer8.size()) {
1707 const unsigned maximumDigits = 10;
1708 int digit = maximumDigits - 1;
1709 // Temporary buffer for the digits. Makes easier
1710 // to reconstruct the input characters when needed.
1711 LChar digits[maximumDigits];
1712
1713 do {
1714 if (m_current == '_') {
1715 if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
1716 return std::nullopt;
1717
1718 shift();
1719 }
1720
1721 decimalValue = decimalValue * 10 + (m_current - '0');
1722 digits[digit] = m_current;
1723 shift();
1724 --digit;
1725 } while (isASCIIDigitOrSeparator(m_current) && digit >= 0);
1726
1727 if (digit >= 0 && m_current != '.' && !isASCIIAlphaCaselessEqual(m_current, 'e') && m_current != 'n')
1728 return NumberParseResult { static_cast<double>(decimalValue) };
1729
1730 for (int i = maximumDigits - 1; i > digit; --i)
1731 record8(digits[i]);
1732 }
1733
1734 while (isASCIIDigitOrSeparator(m_current)) {
1735 if (m_current == '_') {
1736 if (UNLIKELY(!isASCIIDigit(peek(1)) || isLegacyLiteral))
1737 return std::nullopt;
1738
1739 shift();
1740 }
1741
1742 record8(m_current);
1743 shift();
1744 }
1745
1746 if (UNLIKELY(m_current == 'n' && !isLegacyLiteral))
1747 return NumberParseResult { makeIdentifier(m_buffer8.data(), m_buffer8.size()) };
1748
1749 return std::nullopt;
1750}
1751
1752template <typename T>
1753ALWAYS_INLINE bool Lexer<T>::parseNumberAfterDecimalPoint()
1754{
1755 ASSERT(isASCIIDigit(m_current));
1756 record8('.');
1757
1758 do {
1759 if (m_current == '_') {
1760 if (UNLIKELY(!isASCIIDigit(peek(1))))
1761 return false;
1762
1763 shift();
1764 }
1765
1766 record8(m_current);
1767 shift();
1768 } while (isASCIIDigitOrSeparator(m_current));
1769
1770 return true;
1771}
1772
1773template <typename T>
1774ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
1775{
1776 record8('e');
1777 shift();
1778 if (m_current == '+' || m_current == '-') {
1779 record8(m_current);
1780 shift();
1781 }
1782
1783 if (!isASCIIDigit(m_current))
1784 return false;
1785
1786 do {
1787 if (m_current == '_') {
1788 if (UNLIKELY(!isASCIIDigit(peek(1))))
1789 return false;
1790
1791 shift();
1792 }
1793
1794 record8(m_current);
1795 shift();
1796 } while (isASCIIDigitOrSeparator(m_current));
1797
1798 return true;
1799}
1800
1801template <typename T>
1802ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
1803{
1804 while (true) {
1805 while (UNLIKELY(m_current == '*')) {
1806 shift();
1807 if (m_current == '/') {
1808 shift();
1809 return true;
1810 }
1811 }
1812
1813 if (atEnd())
1814 return false;
1815
1816 if (isLineTerminator(m_current)) {
1817 shiftLineTerminator();
1818 m_hasLineTerminatorBeforeToken = true;
1819 } else
1820 shift();
1821 }
1822}
1823
1824template <typename T>
1825ALWAYS_INLINE void Lexer<T>::parseCommentDirective()
1826{
1827 // sourceURL and sourceMappingURL directives.
1828 if (!consume("source"))
1829 return;
1830
1831 if (consume("URL=")) {
1832 m_sourceURLDirective = parseCommentDirectiveValue();
1833 return;
1834 }
1835
1836 if (consume("MappingURL=")) {
1837 m_sourceMappingURLDirective = parseCommentDirectiveValue();
1838 return;
1839 }
1840}
1841
1842template <typename T>
1843ALWAYS_INLINE String Lexer<T>::parseCommentDirectiveValue()
1844{
1845 skipWhitespace();
1846 bool hasNonLatin1 = false;
1847 const T* stringStart = currentSourcePtr();
1848 while (!isWhiteSpace(m_current) && !isLineTerminator(m_current) && m_current != '"' && m_current != '\'' && !atEnd()) {
1849 if (!isLatin1(m_current))
1850 hasNonLatin1 = true;
1851 shift();
1852 }
1853 const T* stringEnd = currentSourcePtr();
1854 skipWhitespace();
1855
1856 if (!isLineTerminator(m_current) && !atEnd())
1857 return String();
1858
1859 unsigned length = stringEnd - stringStart;
1860 if (hasNonLatin1) {
1861 UChar* buffer = nullptr;
1862 String result = StringImpl::createUninitialized(length, buffer);
1863 StringImpl::copyCharacters(buffer, stringStart, length);
1864 return result;
1865 }
1866
1867 LChar* buffer = nullptr;
1868 String result = StringImpl::createUninitialized(length, buffer);
1869 StringImpl::copyCharacters(buffer, stringStart, length);
1870 return result;
1871}
1872
1873template <typename T>
1874template <unsigned length>
1875ALWAYS_INLINE bool Lexer<T>::consume(const char (&input)[length])
1876{
1877 unsigned lengthToCheck = length - 1; // Ignore the ending NULL byte in the string literal.
1878
1879 unsigned i = 0;
1880 for (; i < lengthToCheck && m_current == input[i]; i++)
1881 shift();
1882
1883 return i == lengthToCheck;
1884}
1885
1886template <typename T>
1887bool Lexer<T>::nextTokenIsColon()
1888{
1889 const T* code = m_code;
1890 while (code < m_codeEnd && (isWhiteSpace(*code) || isLineTerminator(*code)))
1891 code++;
1892
1893 return code < m_codeEnd && *code == ':';
1894}
1895
1896template <typename T>
1897void Lexer<T>::fillTokenInfo(JSToken* tokenRecord, JSTokenType token, int lineNumber, int endOffset, int lineStartOffset, JSTextPosition endPosition)
1898{
1899 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1900 tokenLocation->line = lineNumber;
1901 tokenLocation->endOffset = endOffset;
1902 tokenLocation->lineStartOffset = lineStartOffset;
1903 ASSERT(tokenLocation->endOffset >= tokenLocation->lineStartOffset);
1904 tokenRecord->m_endPosition = endPosition;
1905 m_lastToken = token;
1906}
1907
1908template <typename T>
1909JSTokenType Lexer<T>::lexWithoutClearingLineTerminator(JSToken* tokenRecord, OptionSet<LexerFlags> lexerFlags, bool strictMode)
1910{
1911 JSTokenData* tokenData = &tokenRecord->m_data;
1912 JSTokenLocation* tokenLocation = &tokenRecord->m_location;
1913 m_lastTokenLocation = JSTokenLocation(tokenRecord->m_location);
1914
1915 ASSERT(!m_error);
1916 ASSERT(m_buffer8.isEmpty());
1917 ASSERT(m_buffer16.isEmpty());
1918
1919 JSTokenType token = ERRORTOK;
1920
1921start:
1922 skipWhitespace();
1923
1924 tokenLocation->startOffset = currentOffset();
1925 ASSERT(currentOffset() >= currentLineStartOffset());
1926 tokenRecord->m_startPosition = currentPosition();
1927
1928 if (atEnd()) {
1929 token = EOFTOK;
1930 goto returnToken;
1931 }
1932
1933 CharacterType type;
1934 if (LIKELY(isLatin1(m_current)))
1935 type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
1936 else {
1937 UChar32 codePoint;
1938 U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
1939 if (isNonLatin1IdentStart(codePoint))
1940 type = CharacterIdentifierStart;
1941 else if (isLineTerminator(m_current))
1942 type = CharacterLineTerminator;
1943 else
1944 type = CharacterInvalid;
1945 }
1946
1947 switch (type) {
1948 case CharacterGreater:
1949 shift();
1950 if (m_current == '>') {
1951 shift();
1952 if (m_current == '>') {
1953 shift();
1954 if (m_current == '=') {
1955 shift();
1956 token = URSHIFTEQUAL;
1957 break;
1958 }
1959 token = URSHIFT;
1960 break;
1961 }
1962 if (m_current == '=') {
1963 shift();
1964 token = RSHIFTEQUAL;
1965 break;
1966 }
1967 token = RSHIFT;
1968 break;
1969 }
1970 if (m_current == '=') {
1971 shift();
1972 token = GE;
1973 break;
1974 }
1975 token = GT;
1976 break;
1977 case CharacterEqual: {
1978 if (peek(1) == '>') {
1979 token = ARROWFUNCTION;
1980 tokenData->line = lineNumber();
1981 tokenData->offset = currentOffset();
1982 tokenData->lineStartOffset = currentLineStartOffset();
1983 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
1984 shift();
1985 shift();
1986 break;
1987 }
1988
1989 shift();
1990 if (m_current == '=') {
1991 shift();
1992 if (m_current == '=') {
1993 shift();
1994 token = STREQ;
1995 break;
1996 }
1997 token = EQEQ;
1998 break;
1999 }
2000 token = EQUAL;
2001 break;
2002 }
2003 case CharacterLess:
2004 shift();
2005 if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
2006 if (m_scriptMode == JSParserScriptMode::Classic) {
2007 // <!-- marks the beginning of a line comment (for www usage)
2008 goto inSingleLineComment;
2009 }
2010 }
2011 if (m_current == '<') {
2012 shift();
2013 if (m_current == '=') {
2014 shift();
2015 token = LSHIFTEQUAL;
2016 break;
2017 }
2018 token = LSHIFT;
2019 break;
2020 }
2021 if (m_current == '=') {
2022 shift();
2023 token = LE;
2024 break;
2025 }
2026 token = LT;
2027 break;
2028 case CharacterExclamationMark:
2029 shift();
2030 if (m_current == '=') {
2031 shift();
2032 if (m_current == '=') {
2033 shift();
2034 token = STRNEQ;
2035 break;
2036 }
2037 token = NE;
2038 break;
2039 }
2040 token = EXCLAMATION;
2041 break;
2042 case CharacterAdd:
2043 shift();
2044 if (m_current == '+') {
2045 shift();
2046 token = (!m_hasLineTerminatorBeforeToken) ? PLUSPLUS : AUTOPLUSPLUS;
2047 break;
2048 }
2049 if (m_current == '=') {
2050 shift();
2051 token = PLUSEQUAL;
2052 break;
2053 }
2054 token = PLUS;
2055 break;
2056 case CharacterSub:
2057 shift();
2058 if (m_current == '-') {
2059 shift();
2060 if ((m_atLineStart || m_hasLineTerminatorBeforeToken) && m_current == '>') {
2061 if (m_scriptMode == JSParserScriptMode::Classic) {
2062 shift();
2063 goto inSingleLineComment;
2064 }
2065 }
2066 token = (!m_hasLineTerminatorBeforeToken) ? MINUSMINUS : AUTOMINUSMINUS;
2067 break;
2068 }
2069 if (m_current == '=') {
2070 shift();
2071 token = MINUSEQUAL;
2072 break;
2073 }
2074 token = MINUS;
2075 break;
2076 case CharacterMultiply:
2077 shift();
2078 if (m_current == '=') {
2079 shift();
2080 token = MULTEQUAL;
2081 break;
2082 }
2083 if (m_current == '*') {
2084 shift();
2085 if (m_current == '=') {
2086 shift();
2087 token = POWEQUAL;
2088 break;
2089 }
2090 token = POW;
2091 break;
2092 }
2093 token = TIMES;
2094 break;
2095 case CharacterSlash:
2096 shift();
2097 if (m_current == '/') {
2098 shift();
2099 goto inSingleLineCommentCheckForDirectives;
2100 }
2101 if (m_current == '*') {
2102 shift();
2103 auto startLineNumber = m_lineNumber;
2104 auto startLineStartOffset = currentLineStartOffset();
2105 if (parseMultilineComment())
2106 goto start;
2107 m_lexErrorMessage = "Multiline comment was not closed properly"_s;
2108 token = UNTERMINATED_MULTILINE_COMMENT_ERRORTOK;
2109 m_error = true;
2110 fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
2111 return token;
2112 }
2113 if (m_current == '=') {
2114 shift();
2115 token = DIVEQUAL;
2116 break;
2117 }
2118 token = DIVIDE;
2119 break;
2120 case CharacterAnd:
2121 shift();
2122 if (m_current == '&') {
2123 shift();
2124 if (m_current == '=') {
2125 shift();
2126 token = ANDEQUAL;
2127 break;
2128 }
2129 token = AND;
2130 break;
2131 }
2132 if (m_current == '=') {
2133 shift();
2134 token = BITANDEQUAL;
2135 break;
2136 }
2137 token = BITAND;
2138 break;
2139 case CharacterXor:
2140 shift();
2141 if (m_current == '=') {
2142 shift();
2143 token = BITXOREQUAL;
2144 break;
2145 }
2146 token = BITXOR;
2147 break;
2148 case CharacterModulo:
2149 shift();
2150 if (m_current == '=') {
2151 shift();
2152 token = MODEQUAL;
2153 break;
2154 }
2155 token = MOD;
2156 break;
2157 case CharacterOr:
2158 shift();
2159 if (m_current == '=') {
2160 shift();
2161 token = BITOREQUAL;
2162 break;
2163 }
2164 if (m_current == '|') {
2165 shift();
2166 if (m_current == '=') {
2167 shift();
2168 token = OREQUAL;
2169 break;
2170 }
2171 token = OR;
2172 break;
2173 }
2174 token = BITOR;
2175 break;
2176 case CharacterOpenParen:
2177 token = OPENPAREN;
2178 tokenData->line = lineNumber();
2179 tokenData->offset = currentOffset();
2180 tokenData->lineStartOffset = currentLineStartOffset();
2181 shift();
2182 break;
2183 case CharacterCloseParen:
2184 token = CLOSEPAREN;
2185 shift();
2186 break;
2187 case CharacterOpenBracket:
2188 token = OPENBRACKET;
2189 shift();
2190 break;
2191 case CharacterCloseBracket:
2192 token = CLOSEBRACKET;
2193 shift();
2194 break;
2195 case CharacterComma:
2196 token = COMMA;
2197 shift();
2198 break;
2199 case CharacterColon:
2200 token = COLON;
2201 shift();
2202 break;
2203 case CharacterQuestion:
2204 shift();
2205 if (m_current == '?') {
2206 shift();
2207 if (m_current == '=') {
2208 shift();
2209 token = COALESCEEQUAL;
2210 break;
2211 }
2212 token = COALESCE;
2213 break;
2214 }
2215 if (m_current == '.' && !isASCIIDigit(peek(1))) {
2216 shift();
2217 token = QUESTIONDOT;
2218 break;
2219 }
2220 token = QUESTION;
2221 break;
2222 case CharacterTilde:
2223 token = TILDE;
2224 shift();
2225 break;
2226 case CharacterSemicolon:
2227 shift();
2228 token = SEMICOLON;
2229 break;
2230 case CharacterBackQuote:
2231 shift();
2232 token = BACKQUOTE;
2233 break;
2234 case CharacterOpenBrace:
2235 tokenData->line = lineNumber();
2236 tokenData->offset = currentOffset();
2237 tokenData->lineStartOffset = currentLineStartOffset();
2238 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2239 shift();
2240 token = OPENBRACE;
2241 break;
2242 case CharacterCloseBrace:
2243 tokenData->line = lineNumber();
2244 tokenData->offset = currentOffset();
2245 tokenData->lineStartOffset = currentLineStartOffset();
2246 ASSERT(tokenData->offset >= tokenData->lineStartOffset);
2247 shift();
2248 token = CLOSEBRACE;
2249 break;
2250 case CharacterDot:
2251 shift();
2252 if (!isASCIIDigit(m_current)) {
2253 if (UNLIKELY((m_current == '.') && (peek(1) == '.'))) {
2254 shift();
2255 shift();
2256 token = DOTDOTDOT;
2257 break;
2258 }
2259 token = DOT;
2260 break;
2261 }
2262 if (UNLIKELY(!parseNumberAfterDecimalPoint())) {
2263 m_lexErrorMessage = "Non-number found after decimal point"_s;
2264 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2265 goto returnError;
2266 }
2267 token = DOUBLE;
2268 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2269 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2270 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2271 goto returnError;
2272 }
2273 size_t parsedLength;
2274 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2275 if (token == INTEGER)
2276 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2277
2278 if (LIKELY(cannotBeIdentStart(m_current))) {
2279 m_buffer8.shrink(0);
2280 break;
2281 }
2282
2283 if (UNLIKELY(isIdentStart(currentCodePoint()))) {
2284 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2285 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2286 goto returnError;
2287 }
2288 m_buffer8.shrink(0);
2289 break;
2290 case CharacterZero:
2291 shift();
2292 if (isASCIIAlphaCaselessEqual(m_current, 'x')) {
2293 if (UNLIKELY(!isASCIIHexDigit(peek(1)))) {
2294 m_lexErrorMessage = "No hexadecimal digits after '0x'"_s;
2295 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2296 goto returnError;
2297 }
2298
2299 // Shift out the 'x' prefix.
2300 shift();
2301
2302 auto parseNumberResult = parseHex();
2303 if (!parseNumberResult)
2304 tokenData->doubleValue = 0;
2305 else if (std::holds_alternative<double>(*parseNumberResult))
2306 tokenData->doubleValue = std::get<double>(*parseNumberResult);
2307 else {
2308 token = BIGINT;
2309 shift();
2310 tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
2311 tokenData->radix = 16;
2312 }
2313
2314 if (LIKELY(cannotBeIdentStart(m_current))) {
2315 if (LIKELY(token != BIGINT))
2316 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2317 m_buffer8.shrink(0);
2318 break;
2319 }
2320
2321 if (UNLIKELY(isIdentStart(currentCodePoint()))) {
2322 m_lexErrorMessage = "No space between hexadecimal literal and identifier"_s;
2323 token = UNTERMINATED_HEX_NUMBER_ERRORTOK;
2324 goto returnError;
2325 }
2326 if (LIKELY(token != BIGINT))
2327 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2328 m_buffer8.shrink(0);
2329 break;
2330 }
2331 if (isASCIIAlphaCaselessEqual(m_current, 'b')) {
2332 if (UNLIKELY(!isASCIIBinaryDigit(peek(1)))) {
2333 m_lexErrorMessage = "No binary digits after '0b'"_s;
2334 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2335 goto returnError;
2336 }
2337
2338 // Shift out the 'b' prefix.
2339 shift();
2340
2341 auto parseNumberResult = parseBinary();
2342 if (!parseNumberResult)
2343 tokenData->doubleValue = 0;
2344 else if (std::holds_alternative<double>(*parseNumberResult))
2345 tokenData->doubleValue = std::get<double>(*parseNumberResult);
2346 else {
2347 token = BIGINT;
2348 shift();
2349 tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
2350 tokenData->radix = 2;
2351 }
2352
2353 if (LIKELY(cannotBeIdentStart(m_current))) {
2354 if (LIKELY(token != BIGINT))
2355 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2356 m_buffer8.shrink(0);
2357 break;
2358 }
2359
2360 if (UNLIKELY(isIdentStart(currentCodePoint()))) {
2361 m_lexErrorMessage = "No space between binary literal and identifier"_s;
2362 token = UNTERMINATED_BINARY_NUMBER_ERRORTOK;
2363 goto returnError;
2364 }
2365 if (LIKELY(token != BIGINT))
2366 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2367 m_buffer8.shrink(0);
2368 break;
2369 }
2370
2371 if (isASCIIAlphaCaselessEqual(m_current, 'o')) {
2372 if (UNLIKELY(!isASCIIOctalDigit(peek(1)))) {
2373 m_lexErrorMessage = "No octal digits after '0o'"_s;
2374 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2375 goto returnError;
2376 }
2377
2378 // Shift out the 'o' prefix.
2379 shift();
2380
2381 auto parseNumberResult = parseOctal();
2382 if (!parseNumberResult)
2383 tokenData->doubleValue = 0;
2384 else if (std::holds_alternative<double>(*parseNumberResult))
2385 tokenData->doubleValue = std::get<double>(*parseNumberResult);
2386 else {
2387 token = BIGINT;
2388 shift();
2389 tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
2390 tokenData->radix = 8;
2391 }
2392
2393 if (LIKELY(cannotBeIdentStart(m_current))) {
2394 if (LIKELY(token != BIGINT))
2395 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2396 m_buffer8.shrink(0);
2397 break;
2398 }
2399
2400 if (UNLIKELY(isIdentStart(currentCodePoint()))) {
2401 m_lexErrorMessage = "No space between octal literal and identifier"_s;
2402 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2403 goto returnError;
2404 }
2405 if (LIKELY(token != BIGINT))
2406 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2407 m_buffer8.shrink(0);
2408 break;
2409 }
2410
2411 if (UNLIKELY(m_current == '_')) {
2412 m_lexErrorMessage = "Numeric literals may not begin with 0_"_s;
2413 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2414 goto returnError;
2415 }
2416
2417 record8('0');
2418 if (UNLIKELY(strictMode && isASCIIDigit(m_current))) {
2419 m_lexErrorMessage = "Decimal integer literals with a leading zero are forbidden in strict mode"_s;
2420 token = UNTERMINATED_OCTAL_NUMBER_ERRORTOK;
2421 goto returnError;
2422 }
2423 if (isASCIIOctalDigit(m_current)) {
2424 auto parseNumberResult = parseOctal();
2425 if (parseNumberResult && std::holds_alternative<double>(*parseNumberResult)) {
2426 tokenData->doubleValue = std::get<double>(*parseNumberResult);
2427 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2428 }
2429 }
2430 FALLTHROUGH;
2431 case CharacterNumber:
2432 if (LIKELY(token != INTEGER && token != DOUBLE)) {
2433 auto parseNumberResult = parseDecimal();
2434 if (parseNumberResult) {
2435 if (std::holds_alternative<double>(*parseNumberResult)) {
2436 tokenData->doubleValue = std::get<double>(*parseNumberResult);
2437 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2438 } else {
2439 token = BIGINT;
2440 shift();
2441 tokenData->bigIntString = std::get<const Identifier*>(*parseNumberResult);
2442 tokenData->radix = 10;
2443 }
2444 } else {
2445 token = INTEGER;
2446 if (m_current == '.') {
2447 shift();
2448 if (UNLIKELY(isASCIIDigit(m_current) && !parseNumberAfterDecimalPoint())) {
2449 m_lexErrorMessage = "Non-number found after decimal point"_s;
2450 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2451 goto returnError;
2452 }
2453 token = DOUBLE;
2454 }
2455 if (UNLIKELY(isASCIIAlphaCaselessEqual(m_current, 'e') && !parseNumberAfterExponentIndicator())) {
2456 m_lexErrorMessage = "Non-number found after exponent indicator"_s;
2457 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2458 goto returnError;
2459 }
2460 size_t parsedLength;
2461 tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
2462 if (token == INTEGER)
2463 token = tokenTypeForIntegerLikeToken(tokenData->doubleValue);
2464 }
2465 }
2466
2467 if (LIKELY(cannotBeIdentStart(m_current))) {
2468 m_buffer8.shrink(0);
2469 break;
2470 }
2471
2472 if (UNLIKELY(isIdentStart(currentCodePoint()))) {
2473 m_lexErrorMessage = "No identifiers allowed directly after numeric literal"_s;
2474 token = atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK : INVALID_NUMERIC_LITERAL_ERRORTOK;
2475 goto returnError;
2476 }
2477 m_buffer8.shrink(0);
2478 break;
2479 case CharacterQuote: {
2480 auto startLineNumber = m_lineNumber;
2481 auto startLineStartOffset = currentLineStartOffset();
2482 StringParseResult result = StringCannotBeParsed;
2483 if (lexerFlags.contains(LexerFlags::DontBuildStrings))
2484 result = parseString<false>(tokenData, strictMode);
2485 else
2486 result = parseString<true>(tokenData, strictMode);
2487
2488 if (UNLIKELY(result != StringParsedSuccessfully)) {
2489 token = result == StringUnterminated ? UNTERMINATED_STRING_LITERAL_ERRORTOK : INVALID_STRING_LITERAL_ERRORTOK;
2490 m_error = true;
2491 fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
2492 return token;
2493 }
2494 shift();
2495 token = STRING;
2496 m_atLineStart = false;
2497 fillTokenInfo(tokenRecord, token, startLineNumber, currentOffset(), startLineStartOffset, currentPosition());
2498 return token;
2499 }
2500 case CharacterIdentifierStart: {
2501 if constexpr (ASSERT_ENABLED) {
2502 UChar32 codePoint;
2503 U16_GET(m_code, 0, 0, m_codeEnd - m_code, codePoint);
2504 ASSERT(isIdentStart(codePoint));
2505 }
2506 FALLTHROUGH;
2507 }
2508 case CharacterBackSlash:
2509 parseIdent:
2510 if (lexerFlags.contains(LexerFlags::DontBuildKeywords))
2511 token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
2512 else
2513 token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
2514 break;
2515 case CharacterLineTerminator:
2516 ASSERT(isLineTerminator(m_current));
2517 shiftLineTerminator();
2518 m_atLineStart = true;
2519 m_hasLineTerminatorBeforeToken = true;
2520 goto start;
2521 case CharacterHash: {
2522 // Hashbang is only permitted at the start of the source text.
2523 auto next = peek(1);
2524 if (next == '!' && !currentOffset()) {
2525 shift();
2526 shift();
2527 goto inSingleLineComment;
2528 }
2529 // Otherwise, it could be a valid PrivateName.
2530 if (isSingleCharacterIdentStart(next) || next == '\\') {
2531 lexerFlags.remove(LexerFlags::DontBuildKeywords);
2532 goto parseIdent;
2533 }
2534 goto invalidCharacter;
2535 }
2536 case CharacterPrivateIdentifierStart:
2537 if (m_parsingBuiltinFunction)
2538 goto parseIdent;
2539 goto invalidCharacter;
2540 case CharacterOtherIdentifierPart:
2541 case CharacterInvalid:
2542 goto invalidCharacter;
2543 default:
2544 RELEASE_ASSERT_NOT_REACHED();
2545 m_lexErrorMessage = "Internal Error"_s;
2546 token = ERRORTOK;
2547 goto returnError;
2548 }
2549
2550 m_atLineStart = false;
2551 goto returnToken;
2552
2553inSingleLineCommentCheckForDirectives:
2554 // Script comment directives like "//# sourceURL=test.js".
2555 if (UNLIKELY((m_current == '#' || m_current == '@') && isWhiteSpace(peek(1)))) {
2556 shift();
2557 shift();
2558 parseCommentDirective();
2559 }
2560 // Fall through to complete single line comment parsing.
2561
2562inSingleLineComment:
2563 {
2564 auto lineNumber = m_lineNumber;
2565 auto endOffset = currentOffset();
2566 auto lineStartOffset = currentLineStartOffset();
2567 auto endPosition = currentPosition();
2568
2569 while (!isLineTerminator(m_current)) {
2570 if (atEnd()) {
2571 token = EOFTOK;
2572 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2573 return token;
2574 }
2575 shift();
2576 }
2577 shiftLineTerminator();
2578 m_atLineStart = true;
2579 m_hasLineTerminatorBeforeToken = true;
2580 if (!lastTokenWasRestrKeyword())
2581 goto start;
2582
2583 token = SEMICOLON;
2584 fillTokenInfo(tokenRecord, token, lineNumber, endOffset, lineStartOffset, endPosition);
2585 return token;
2586 }
2587
2588returnToken:
2589 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2590 return token;
2591
2592invalidCharacter:
2593 m_lexErrorMessage = invalidCharacterMessage();
2594 token = ERRORTOK;
2595 // Falls through to return error.
2596
2597returnError:
2598 m_error = true;
2599 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2600 RELEASE_ASSERT(token & CanBeErrorTokenFlag);
2601 return token;
2602}
2603
2604template <typename T>
2605static inline void orCharacter(UChar&, UChar);
2606
2607template <>
2608inline void orCharacter<LChar>(UChar&, UChar) { }
2609
2610template <>
2611inline void orCharacter<UChar>(UChar& orAccumulator, UChar character)
2612{
2613 orAccumulator |= character;
2614}
2615
2616template <typename T>
2617JSTokenType Lexer<T>::scanRegExp(JSToken* tokenRecord, UChar patternPrefix)
2618{
2619 JSTokenData* tokenData = &tokenRecord->m_data;
2620 ASSERT(m_buffer16.isEmpty());
2621
2622 bool lastWasEscape = false;
2623 bool inBrackets = false;
2624 UChar charactersOredTogether = 0;
2625
2626 if (patternPrefix) {
2627 ASSERT(!isLineTerminator(patternPrefix));
2628 ASSERT(patternPrefix != '/');
2629 ASSERT(patternPrefix != '[');
2630 record16(patternPrefix);
2631 }
2632
2633 while (true) {
2634 if (isLineTerminator(m_current) || atEnd()) {
2635 m_buffer16.shrink(0);
2636 JSTokenType token = UNTERMINATED_REGEXP_LITERAL_ERRORTOK;
2637 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2638 m_error = true;
2639 m_lexErrorMessage = makeString("Unterminated regular expression literal '", getToken(*tokenRecord), "'");
2640 return token;
2641 }
2642
2643 T prev = m_current;
2644
2645 shift();
2646
2647 if (prev == '/' && !lastWasEscape && !inBrackets)
2648 break;
2649
2650 record16(prev);
2651 orCharacter<T>(charactersOredTogether, prev);
2652
2653 if (lastWasEscape) {
2654 lastWasEscape = false;
2655 continue;
2656 }
2657
2658 switch (prev) {
2659 case '[':
2660 inBrackets = true;
2661 break;
2662 case ']':
2663 inBrackets = false;
2664 break;
2665 case '\\':
2666 lastWasEscape = true;
2667 break;
2668 }
2669 }
2670
2671 tokenData->pattern = makeRightSizedIdentifier(m_buffer16.data(), m_buffer16.size(), charactersOredTogether);
2672 m_buffer16.shrink(0);
2673
2674 ASSERT(m_buffer8.isEmpty());
2675 while (LIKELY(isLatin1(m_current)) && isIdentPart(static_cast<LChar>(m_current))) {
2676 record8(static_cast<LChar>(m_current));
2677 shift();
2678 }
2679
2680 // Normally this would not be a lex error but dealing with surrogate pairs here is annoying and it's going to be an error anyway...
2681 if (UNLIKELY(!isLatin1(m_current) && !isWhiteSpace(m_current) && !isLineTerminator(m_current))) {
2682 m_buffer8.shrink(0);
2683 JSTokenType token = INVALID_IDENTIFIER_UNICODE_ERRORTOK;
2684 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2685 m_error = true;
2686 String codePoint = String::fromCodePoint(currentCodePoint());
2687 if (!codePoint)
2688 codePoint = "`invalid unicode character`"_s;
2689 m_lexErrorMessage = makeString("Invalid non-latin character in RexExp literal's flags '", getToken(*tokenRecord), codePoint, "'");
2690 return token;
2691 }
2692
2693 tokenData->flags = makeIdentifier(m_buffer8.data(), m_buffer8.size());
2694 m_buffer8.shrink(0);
2695
2696 // Since RegExp always ends with / or flags (IdentifierPart), m_atLineStart always becomes false.
2697 m_atLineStart = false;
2698
2699 JSTokenType token = REGEXP;
2700 fillTokenInfo(tokenRecord, token, m_lineNumber, currentOffset(), currentLineStartOffset(), currentPosition());
2701 return token;
2702}
2703
2704template <typename T>
2705JSTokenType Lexer<T>::scanTemplateString(JSToken* tokenRecord, RawStringsBuildMode rawStringsBuildMode)
2706{
2707 JSTokenData* tokenData = &tokenRecord->m_data;
2708 ASSERT(!m_error);
2709 ASSERT(m_buffer16.isEmpty());
2710
2711 int startingLineStartOffset = currentLineStartOffset();
2712 int startingLineNumber = lineNumber();
2713
2714 // Leading backquote ` (for template head) or closing brace } (for template trailing) are already shifted in the previous token scan.
2715 // So in this re-scan phase, shift() is not needed here.
2716 StringParseResult result = parseTemplateLiteral(tokenData, rawStringsBuildMode);
2717 JSTokenType token = ERRORTOK;
2718 if (UNLIKELY(result != StringParsedSuccessfully)) {
2719 token = result == StringUnterminated ? UNTERMINATED_TEMPLATE_LITERAL_ERRORTOK : INVALID_TEMPLATE_LITERAL_ERRORTOK;
2720 m_error = true;
2721 } else
2722 token = TEMPLATE;
2723
2724 // Since TemplateString always ends with ` or }, m_atLineStart always becomes false.
2725 m_atLineStart = false;
2726 fillTokenInfo(tokenRecord, token, startingLineNumber, currentOffset(), startingLineStartOffset, currentPosition());
2727 return token;
2728}
2729
2730template <typename T>
2731void Lexer<T>::clear()
2732{
2733 m_arena = nullptr;
2734
2735 Vector<LChar> newBuffer8;
2736 m_buffer8.swap(newBuffer8);
2737
2738 Vector<UChar> newBuffer16;
2739 m_buffer16.swap(newBuffer16);
2740
2741 Vector<UChar> newBufferForRawTemplateString16;
2742 m_bufferForRawTemplateString16.swap(newBufferForRawTemplateString16);
2743
2744 m_isReparsingFunction = false;
2745}
2746
2747// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
2748template class Lexer<LChar>;
2749template class Lexer<UChar>;
2750
2751} // namespace JSC
Note: See TracBrowser for help on using the repository browser.