source: webkit/trunk/JavaScriptCore/parser/Lexer.h@ 43358

Last change on this file since 43358 was 43358, checked in by Darin Adler, 16 years ago

2009-05-07 Darin Adler <Darin Adler>

Reviewed by Sam Weinig.

Bug 25589: goto instead of state machine in lexer
https://bugs.webkit.org/show_bug.cgi?id=25589

SunSpider is 0.8% faster.

  • parser/Lexer.cpp: (JSC::Lexer::currentCharacter): Added. (JSC::Lexer::currentOffset): Changed to call currentCharacter for clarity. (JSC::Lexer::setCode): Removed code to set now-obsolete m_skipLineEnd. (JSC::Lexer::shiftLineTerminator): Added. Handles line numbers and the two-character line terminators. (JSC::Lexer::makeIdentifier): Changed to take characters and length rather than a vector, since we now make these directly out of the source buffer when possible. (JSC::Lexer::lastTokenWasRestrKeyword): Added. (JSC::isNonASCIIIdentStart): Broke out the non-inline part. (JSC::isIdentStart): Moved here. (JSC::isNonASCIIIdentPart): Broke out the non-inline part. (JSC::isIdentPart): Moved here. (JSC::singleEscape): Moved here, and removed some unneeded cases. (JSC::Lexer::record8): Moved here. (JSC::Lexer::record16): Moved here. (JSC::Lexer::lex): Rewrote this whole function to use goto and not use a state machine. Got rid of most of the local variables. Also rolled the matchPunctuator function in here. (JSC::Lexer::scanRegExp): Changed to use the new version of isLineTerminator. Clear m_buffer16 after using it instead of before.
  • parser/Lexer.h: Removed State enum, setDone function, nextLine function, lookupKeywordFunction, one of the isLineTerminator functions, m_done data member, m_skipLineEnd data member, and m_state data member. Added shiftLineTerminator function, currentCharacter function, and changed the arguments to the makeIdentifier function. Removed one branch from the isLineTerminator function.
  • runtime/StringPrototype.cpp: (JSC::stringProtoFuncReplace): Streamlined the case where we don't replace anything.
  • Property svn:eol-style set to native
File size: 4.3 KB
Line 
1/*
2 * Copyright (C) 1999-2000 Harri Porten ([email protected])
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef Lexer_h
23#define Lexer_h
24
25#include "Lookup.h"
26#include "SegmentedVector.h"
27#include "SourceCode.h"
28#include <wtf/ASCIICType.h>
29#include <wtf/Vector.h>
30#include <wtf/unicode/Unicode.h>
31
32namespace JSC {
33
34 class RegExp;
35
36 class Lexer : Noncopyable {
37 public:
38 // Character manipulation functions.
39 static bool isWhiteSpace(int character);
40 static bool isLineTerminator(int character);
41 static unsigned char convertHex(int c1, int c2);
42 static UChar convertUnicode(int c1, int c2, int c3, int c4);
43
44 // Functions to set up parsing.
45 void setCode(const SourceCode&);
46 void setIsReparsing() { m_isReparsing = true; }
47
48 // Functions for the parser itself.
49 int lex(void* lvalp, void* llocp);
50 int lineNumber() const { return m_lineNumber; }
51 bool prevTerminator() const { return m_terminator; }
52 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
53 bool scanRegExp();
54 const UString& pattern() const { return m_pattern; }
55 const UString& flags() const { return m_flags; }
56
57 // Functions for use after parsing.
58 bool sawError() const { return m_error; }
59 void clear();
60
61 private:
62 friend class JSGlobalData;
63
64 Lexer(JSGlobalData*);
65 ~Lexer();
66
67 void shift1();
68 void shift2();
69 void shift3();
70 void shift4();
71 void shiftLineTerminator();
72
73 void record8(int);
74 void record16(int);
75 void record16(UChar);
76
77 void copyCodeWithoutBOMs();
78
79 int currentOffset() const;
80 const UChar* currentCharacter() const;
81
82 JSC::Identifier* makeIdentifier(const UChar* buffer, size_t length);
83
84 bool lastTokenWasRestrKeyword() const;
85
86 static const size_t initialReadBufferCapacity = 32;
87 static const size_t initialIdentifierTableCapacity = 64;
88
89 int m_lineNumber;
90
91 Vector<char> m_buffer8;
92 Vector<UChar> m_buffer16;
93 bool m_terminator;
94 bool m_delimited; // encountered delimiter like "'" and "}" on last run
95 int m_lastToken;
96
97 const SourceCode* m_source;
98 const UChar* m_code;
99 const UChar* m_codeStart;
100 const UChar* m_codeEnd;
101 bool m_isReparsing;
102 bool m_atLineStart;
103 bool m_error;
104
105 // current and following unicode characters (int to allow for -1 for end-of-file marker)
106 int m_current;
107 int m_next1;
108 int m_next2;
109 int m_next3;
110
111 SegmentedVector<JSC::Identifier, initialIdentifierTableCapacity> m_identifiers;
112
113 JSGlobalData* m_globalData;
114
115 UString m_pattern;
116 UString m_flags;
117
118 const HashTable m_keywordTable;
119
120 Vector<UChar> m_codeWithoutBOMs;
121 };
122
123 inline bool Lexer::isWhiteSpace(int ch)
124 {
125 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
126 }
127
128 inline bool Lexer::isLineTerminator(int ch)
129 {
130 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
131 }
132
133 inline unsigned char Lexer::convertHex(int c1, int c2)
134 {
135 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
136 }
137
138 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
139 {
140 return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
141 }
142
143} // namespace JSC
144
145#endif // Lexer_h
Note: See TracBrowser for help on using the repository browser.