source: webkit/trunk/JavaScriptCore/kjs/lexer.h@ 27747

Last change on this file since 27747 was 17862, checked in by ap, 19 years ago

2006-11-20 W. Andy Carrel <[email protected]>

Reviewed by Maciej.

http://bugs.webkit.org/show_bug.cgi?id=11501
REGRESSION: \u no longer escapes metacharacters in RegExps
http://bugs.webkit.org/show_bug.cgi?id=11502
Serializing RegExps doesn't preserve Unicode escapes

JavaScriptCore:

  • kjs/lexer.cpp: (Lexer::Lexer): (Lexer::setCode): (Lexer::shift): (Lexer::scanRegExp): Push \u parsing back down into the RegExp object rather than in the parser. This backs out r17354 in favor of a new fix that better matches the behavior of other browsers.
  • kjs/lexer.h:
  • kjs/regexp.cpp: (KJS::RegExp::RegExp): (KJS::sanitizePattern): (KJS::isHexDigit): (KJS::convertHex): (KJS::convertUnicode):
  • kjs/regexp.h: Translate \u escaped unicode characters for the benefit of pcre.
  • kjs/ustring.cpp: (KJS::UString::append): Fix failure to increment length on the first UChar appended to a UString that was copy-on-write.
  • tests/mozilla/ecma_2/RegExp/properties-001.js: Adjust tests back to the uniform standards.

LayoutTests:

  • fast/js/kde/RegExp-expected.txt:
  • fast/js/regexp-unicode-handling-expected.txt: Adjust these test results to passing as a result of other included changes in this revision.
  • Property svn:eol-style set to native
File size: 4.2 KB
Line 
1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten ([email protected])
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef _KJSLEXER_H_
24#define _KJSLEXER_H_
25
26#include "ustring.h"
27
28
29namespace KJS {
30
31 class Identifier;
32
33 class RegExp;
34
35 class Lexer {
36 public:
37 Lexer();
38 ~Lexer();
39 static Lexer *curr();
40
41 void setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len);
42 int lex();
43
44 int lineNo() const { return yylineno; }
45 UString sourceURL() const { return m_sourceURL; }
46
47 bool prevTerminator() const { return terminator; }
48
49 enum State { Start,
50 IdentifierOrKeyword,
51 Identifier,
52 InIdentifierOrKeyword,
53 InIdentifier,
54 InIdentifierUnicodeEscapeStart,
55 InIdentifierUnicodeEscape,
56 InSingleLineComment,
57 InMultiLineComment,
58 InNum,
59 InNum0,
60 InHex,
61 InOctal,
62 InDecimal,
63 InExponentIndicator,
64 InExponent,
65 Hex,
66 Octal,
67 Number,
68 String,
69 Eof,
70 InString,
71 InEscapeSequence,
72 InHexEscape,
73 InUnicodeEscape,
74 Other,
75 Bad };
76
77 bool scanRegExp();
78 UString pattern, flags;
79
80 private:
81 int yylineno;
82 UString m_sourceURL;
83 bool done;
84 char *buffer8;
85 UChar *buffer16;
86 unsigned int size8, size16;
87 unsigned int pos8, pos16;
88 bool terminator;
89 bool restrKeyword;
90 // encountered delimiter like "'" and "}" on last run
91 bool delimited;
92 bool skipLF;
93 bool skipCR;
94 bool eatNextIdentifier;
95 int stackToken;
96 int lastToken;
97
98 State state;
99 void setDone(State);
100 unsigned int pos;
101 void shift(unsigned int p);
102 void nextLine();
103 int lookupKeyword(const char *);
104
105 bool isWhiteSpace() const;
106 bool isLineTerminator();
107 static bool isOctalDigit(int);
108
109 int matchPunctuator(int c1, int c2, int c3, int c4);
110 static unsigned short singleEscape(unsigned short);
111 static unsigned short convertOctal(int c1, int c2, int c3);
112 public:
113 static unsigned char convertHex(int);
114 static unsigned char convertHex(int c1, int c2);
115 static UChar convertUnicode(int c1, int c2, int c3, int c4);
116 static bool isIdentStart(int);
117 static bool isIdentPart(int);
118 static bool isHexDigit(int);
119
120#ifdef KJS_DEBUG_MEM
121 /**
122 * Clear statically allocated resources
123 */
124 static void globalClear();
125#endif
126
127 bool sawError() const { return error; }
128 void doneParsing();
129
130 private:
131
132 void record8(int);
133 void record16(int);
134 void record16(UChar);
135
136 KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos);
137 UString *makeUString(UChar *buffer, unsigned int pos);
138
139 const UChar *code;
140 unsigned int length;
141 int yycolumn;
142#ifndef KJS_PURE_ECMA
143 int bol; // begin of line
144#endif
145 bool error;
146
147 // current and following unicode characters (int to allow for -1 for end-of-file marker)
148 int current, next1, next2, next3;
149
150 UString **strings;
151 unsigned int numStrings;
152 unsigned int stringsCapacity;
153
154 KJS::Identifier **identifiers;
155 unsigned int numIdentifiers;
156 unsigned int identifiersCapacity;
157
158 // for future extensions
159 class LexerPrivate;
160 LexerPrivate *priv;
161 };
162
163} // namespace
164
165#endif
Note: See TracBrowser for help on using the repository browser.