Changeset 43156 in webkit for trunk/JavaScriptCore/parser/Lexer.cpp
- Timestamp:
- May 3, 2009, 9:49:35 AM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/parser/Lexer.cpp
r43144 r43156 32 32 #include <limits.h> 33 33 #include <string.h> 34 #include <wtf/ASCIICType.h>35 34 #include <wtf/Assertions.h> 36 35 … … 38 37 using namespace Unicode; 39 38 40 // we can't specify the namespace in yacc's C output, so do it here39 // We can't specify the namespace in yacc's C output, so do it here instead. 41 40 using namespace JSC; 42 41 … … 48 47 #include "Lexer.lut.h" 49 48 50 // a bridge for yacc from the C world to C++49 // A bridge for yacc from the C world to the C++ world. 51 50 int jscyylex(void* lvalp, void* llocp, void* globalData) 52 51 { … … 56 55 namespace JSC { 57 56 58 static bool isDecimalDigit(int); 57 static const UChar byteOrderMark = 0xFEFF; 58 59 // Values for m_skipLineEnd. 60 static const unsigned char SkipLFShift = 0; 61 static const unsigned char SkipCRShift = 1; 62 static const unsigned char SkipLF = 1 << SkipLFShift; 63 static const unsigned char SkipCR = 1 << SkipCRShift; 59 64 60 65 Lexer::Lexer(JSGlobalData* globalData) 61 : yylineno(1) 62 , m_restrKeyword(false) 63 , m_eatNextIdentifier(false) 64 , m_stackToken(-1) 65 , m_lastToken(-1) 66 , m_position(0) 67 , m_code(0) 68 , m_length(0) 69 , m_isReparsing(false) 70 , m_atLineStart(true) 71 , m_current(0) 72 , m_next1(0) 73 , m_next2(0) 74 , m_next3(0) 75 , m_currentOffset(0) 76 , m_nextOffset1(0) 77 , m_nextOffset2(0) 78 , m_nextOffset3(0) 66 : m_isReparsing(false) 79 67 , m_globalData(globalData) 80 , m_ mainTable(JSC::mainTable)68 , m_keywordTable(JSC::mainTable) 81 69 { 82 70 m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); … … 86 74 Lexer::~Lexer() 87 75 { 88 m_mainTable.deleteTable(); 89 } 90 91 ALWAYS_INLINE void Lexer::shift(unsigned p) 92 { 93 // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM, 94 // see <https://bugs.webkit.org/show_bug.cgi?id=4931>. 95 96 while (p--) { 97 m_current = m_next1; 98 m_next1 = m_next2; 99 m_next2 = m_next3; 100 m_currentOffset = m_nextOffset1; 101 m_nextOffset1 = m_nextOffset2; 102 m_nextOffset2 = m_nextOffset3; 103 do { 104 if (m_position >= m_length) { 105 m_nextOffset3 = m_position; 106 m_position++; 107 m_next3 = -1; 108 break; 109 } 110 m_nextOffset3 = m_position; 111 m_next3 = m_code[m_position++]; 112 } while (UNLIKELY(m_next3 == 0xFEFF)); 113 } 76 m_keywordTable.deleteTable(); 77 } 78 79 inline int Lexer::currentOffset() const 80 { 81 return m_code - 4 - m_codeStart; 82 } 83 84 ALWAYS_INLINE void Lexer::shift1() 85 { 86 m_current = m_next1; 87 m_next1 = m_next2; 88 m_next2 = m_next3; 89 if (LIKELY(m_code < m_codeEnd)) 90 m_next3 = m_code[0]; 91 else 92 m_next3 = -1; 93 94 ++m_code; 95 } 96 97 ALWAYS_INLINE void Lexer::shift2() 98 { 99 m_current = m_next2; 100 m_next1 = m_next3; 101 if (LIKELY(m_code + 1 < m_codeEnd)) { 102 m_next2 = m_code[0]; 103 m_next3 = m_code[1]; 104 } else { 105 m_next2 = m_code < m_codeEnd ? m_code[0] : -1; 106 m_next3 = -1; 107 } 108 109 m_code += 2; 110 } 111 112 ALWAYS_INLINE void Lexer::shift3() 113 { 114 m_current = m_next3; 115 if (LIKELY(m_code + 2 < m_codeEnd)) { 116 m_next1 = m_code[0]; 117 m_next2 = m_code[1]; 118 m_next3 = m_code[2]; 119 } else { 120 m_next1 = m_code < m_codeEnd ? m_code[0] : -1; 121 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; 122 m_next3 = -1; 123 } 124 125 m_code += 3; 126 } 127 128 ALWAYS_INLINE void Lexer::shift4() 129 { 130 if (LIKELY(m_code + 3 < m_codeEnd)) { 131 m_current = m_code[0]; 132 m_next1 = m_code[1]; 133 m_next2 = m_code[2]; 134 m_next3 = m_code[3]; 135 } else { 136 m_current = m_code < m_codeEnd ? m_code[0] : -1; 137 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; 138 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; 139 m_next3 = -1; 140 } 141 142 m_code += 4; 114 143 } 115 144 116 145 void Lexer::setCode(const SourceCode& source) 117 146 { 118 yylineno = source.firstLine(); 119 m_restrKeyword = false; 147 m_lineNumber = source.firstLine(); 120 148 m_delimited = false; 121 m_eatNextIdentifier = false;122 m_stackToken = -1;123 149 m_lastToken = -1; 124 150 125 m_position = source.startOffset(); 151 const UChar* data = source.provider()->data(); 152 126 153 m_source = &source; 127 m_code = source.provider()->data();128 m_ length = source.endOffset();129 m_ skipLF = false;130 m_skip CR = false;154 m_codeStart = data; 155 m_code = data + source.startOffset(); 156 m_codeEnd = data + source.endOffset(); 157 m_skipLineEnd = 0; 131 158 m_error = false; 132 159 m_atLineStart = true; 133 160 134 // read first characters 135 shift(4); 161 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. 162 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. 163 if (source.provider()->hasBOMs()) { 164 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { 165 if (UNLIKELY(*p == byteOrderMark)) { 166 copyCodeWithoutBOMs(); 167 break; 168 } 169 } 170 } 171 172 // Read the first characters into the 4-character buffer. 173 shift4(); 174 ASSERT(currentOffset() == source.startOffset()); 175 } 176 177 void Lexer::copyCodeWithoutBOMs() 178 { 179 // Note: In this case, the character offset data for debugging will be incorrect. 180 // If it's important to correctly debug code with extraneous BOMs, then the caller 181 // should strip the BOMs when creating the SourceProvider object and do its own 182 // mapping of offsets within the stripped text to original text offset. 183 184 m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code); 185 for (const UChar* p = m_code; p < m_codeEnd; ++p) { 186 UChar c = *p; 187 if (c != byteOrderMark) 188 m_codeWithoutBOMs.append(c); 189 } 190 ptrdiff_t startDelta = m_codeStart - m_code; 191 m_code = m_codeWithoutBOMs.data(); 192 m_codeStart = m_code + startDelta; 193 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); 136 194 } 137 195 … … 139 197 void Lexer::nextLine() 140 198 { 141 yylineno++;199 ++m_lineNumber; 142 200 m_atLineStart = true; 143 201 } … … 149 207 } 150 208 151 int Lexer::lex(void* p1, void* p2) 152 { 153 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); 154 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); 155 int token = 0; 156 m_state = Start; 157 unsigned short stringType = 0; // either single or double quotes 158 m_buffer8.resize(0); 159 m_buffer16.resize(0); 160 m_done = false; 161 m_terminator = false; 162 m_skipLF = false; 163 m_skipCR = false; 164 165 // did we push a token on the stack previously ? 166 // (after an automatic semicolon insertion) 167 if (m_stackToken >= 0) { 168 setDone(Other); 169 token = m_stackToken; 170 m_stackToken = 0; 171 } 172 int startOffset = m_currentOffset; 173 if (!m_done) { 174 while (true) { 175 if (m_skipLF && m_current != '\n') // found \r but not \n afterwards 176 m_skipLF = false; 177 if (m_skipCR && m_current != '\r') // found \n but not \r afterwards 178 m_skipCR = false; 179 if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one 180 m_skipLF = false; 181 m_skipCR = false; 182 shift(1); 183 } 184 switch (m_state) { 185 case Start: 186 startOffset = m_currentOffset; 187 if (isWhiteSpace()) { 188 // do nothing 189 } else if (m_current == '/' && m_next1 == '/') { 190 shift(1); 191 m_state = InSingleLineComment; 192 } else if (m_current == '/' && m_next1 == '*') { 193 shift(1); 194 m_state = InMultiLineComment; 195 } else if (m_current == -1) { 196 if (!m_terminator && !m_delimited && !m_isReparsing) { 197 // automatic semicolon insertion if program incomplete 198 token = ';'; 199 m_stackToken = 0; 200 setDone(Other); 201 } else 202 setDone(Eof); 203 } else if (isLineTerminator()) { 204 nextLine(); 205 m_terminator = true; 206 if (m_restrKeyword) { 207 token = ';'; 208 setDone(Other); 209 } 210 } else if (m_current == '"' || m_current == '\'') { 211 m_state = InString; 212 stringType = static_cast<unsigned short>(m_current); 213 } else if (isIdentStart(m_current)) { 214 record16(m_current); 215 m_state = InIdentifierOrKeyword; 216 } else if (m_current == '\\') 217 m_state = InIdentifierStartUnicodeEscapeStart; 218 else if (m_current == '0') { 219 record8(m_current); 220 m_state = InNum0; 221 } else if (isDecimalDigit(m_current)) { 222 record8(m_current); 223 m_state = InNum; 224 } else if (m_current == '.' && isDecimalDigit(m_next1)) { 225 record8(m_current); 226 m_state = InDecimal; 227 // <!-- marks the beginning of a line comment (for www usage) 228 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { 229 shift(3); 230 m_state = InSingleLineComment; 231 // same for --> 232 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') { 233 shift(2); 234 m_state = InSingleLineComment; 235 } else { 236 token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3); 237 if (token != -1) 238 setDone(Other); 239 else 240 setDone(Bad); 241 } 242 break; 243 case InString: 244 if (m_current == stringType) { 245 shift(1); 246 setDone(String); 247 } else if (isLineTerminator() || m_current == -1) 248 setDone(Bad); 249 else if (m_current == '\\') 250 m_state = InEscapeSequence; 251 else 252 record16(m_current); 253 break; 254 // Escape Sequences inside of strings 255 case InEscapeSequence: 256 if (isOctalDigit(m_current)) { 257 if (m_current >= '0' && m_current <= '3' && 258 isOctalDigit(m_next1) && isOctalDigit(m_next2)) { 259 record16(convertOctal(m_current, m_next1, m_next2)); 260 shift(2); 261 m_state = InString; 262 } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) { 263 record16(convertOctal('0', m_current, m_next1)); 264 shift(1); 265 m_state = InString; 266 } else if (isOctalDigit(m_current)) { 267 record16(convertOctal('0', '0', m_current)); 268 m_state = InString; 269 } else 270 setDone(Bad); 271 } else if (m_current == 'x') 272 m_state = InHexEscape; 273 else if (m_current == 'u') 274 m_state = InUnicodeEscape; 275 else if (isLineTerminator()) { 276 nextLine(); 277 m_state = InString; 278 } else { 279 record16(singleEscape(static_cast<unsigned short>(m_current))); 280 m_state = InString; 281 } 282 break; 283 case InHexEscape: 284 if (isHexDigit(m_current) && isHexDigit(m_next1)) { 285 m_state = InString; 286 record16(convertHex(m_current, m_next1)); 287 shift(1); 288 } else if (m_current == stringType) { 289 record16('x'); 290 shift(1); 291 setDone(String); 292 } else { 293 record16('x'); 294 record16(m_current); 295 m_state = InString; 296 } 297 break; 298 case InUnicodeEscape: 299 if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) { 300 record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); 301 shift(3); 302 m_state = InString; 303 } else if (m_current == stringType) { 304 record16('u'); 305 shift(1); 306 setDone(String); 307 } else 308 setDone(Bad); 309 break; 310 case InSingleLineComment: 311 if (isLineTerminator()) { 312 nextLine(); 313 m_terminator = true; 314 if (m_restrKeyword) { 315 token = ';'; 316 setDone(Other); 317 } else 318 m_state = Start; 319 } else if (m_current == -1) 320 setDone(Eof); 321 break; 322 case InMultiLineComment: 323 if (m_current == -1) 324 setDone(Bad); 325 else if (isLineTerminator()) 326 nextLine(); 327 else if (m_current == '*' && m_next1 == '/') { 328 m_state = Start; 329 shift(1); 330 } 331 break; 332 case InIdentifierOrKeyword: 333 case InIdentifier: 334 if (isIdentPart(m_current)) 335 record16(m_current); 336 else if (m_current == '\\') 337 m_state = InIdentifierPartUnicodeEscapeStart; 338 else 339 setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); 340 break; 341 case InNum0: 342 if (m_current == 'x' || m_current == 'X') { 343 record8(m_current); 344 m_state = InHex; 345 } else if (m_current == '.') { 346 record8(m_current); 347 m_state = InDecimal; 348 } else if (m_current == 'e' || m_current == 'E') { 349 record8(m_current); 350 m_state = InExponentIndicator; 351 } else if (isOctalDigit(m_current)) { 352 record8(m_current); 353 m_state = InOctal; 354 } else if (isDecimalDigit(m_current)) { 355 record8(m_current); 356 m_state = InDecimal; 357 } else 358 setDone(Number); 359 break; 360 case InHex: 361 if (isHexDigit(m_current)) 362 record8(m_current); 363 else 364 setDone(Hex); 365 break; 366 case InOctal: 367 if (isOctalDigit(m_current)) 368 record8(m_current); 369 else if (isDecimalDigit(m_current)) { 370 record8(m_current); 371 m_state = InDecimal; 372 } else 373 setDone(Octal); 374 break; 375 case InNum: 376 if (isDecimalDigit(m_current)) 377 record8(m_current); 378 else if (m_current == '.') { 379 record8(m_current); 380 m_state = InDecimal; 381 } else if (m_current == 'e' || m_current == 'E') { 382 record8(m_current); 383 m_state = InExponentIndicator; 384 } else 385 setDone(Number); 386 break; 387 case InDecimal: 388 if (isDecimalDigit(m_current)) 389 record8(m_current); 390 else if (m_current == 'e' || m_current == 'E') { 391 record8(m_current); 392 m_state = InExponentIndicator; 393 } else 394 setDone(Number); 395 break; 396 case InExponentIndicator: 397 if (m_current == '+' || m_current == '-') 398 record8(m_current); 399 else if (isDecimalDigit(m_current)) { 400 record8(m_current); 401 m_state = InExponent; 402 } else 403 setDone(Bad); 404 break; 405 case InExponent: 406 if (isDecimalDigit(m_current)) 407 record8(m_current); 408 else 409 setDone(Number); 410 break; 411 case InIdentifierStartUnicodeEscapeStart: 412 if (m_current == 'u') 413 m_state = InIdentifierStartUnicodeEscape; 414 else 415 setDone(Bad); 416 break; 417 case InIdentifierPartUnicodeEscapeStart: 418 if (m_current == 'u') 419 m_state = InIdentifierPartUnicodeEscape; 420 else 421 setDone(Bad); 422 break; 423 case InIdentifierStartUnicodeEscape: 424 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { 425 setDone(Bad); 426 break; 427 } 428 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 429 shift(3); 430 if (!isIdentStart(token)) { 431 setDone(Bad); 432 break; 433 } 434 record16(token); 435 m_state = InIdentifier; 436 break; 437 case InIdentifierPartUnicodeEscape: 438 if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) { 439 setDone(Bad); 440 break; 441 } 442 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 443 shift(3); 444 if (!isIdentPart(token)) { 445 setDone(Bad); 446 break; 447 } 448 record16(token); 449 m_state = InIdentifier; 450 break; 451 default: 452 ASSERT(!"Unhandled state in switch statement"); 453 } 454 455 if (m_state != Start && m_state != InSingleLineComment) 456 m_atLineStart = false; 457 if (m_done) 458 break; 459 460 shift(1); 461 } 462 } 463 464 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad 465 if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current)) 466 m_state = Bad; 467 468 // terminate string 469 m_buffer8.append('\0'); 470 471 #ifdef JSC_DEBUG_LEX 472 fprintf(stderr, "line: %d ", lineNo()); 473 fprintf(stderr, "yytext (%x): ", m_buffer8[0]); 474 fprintf(stderr, "%s ", m_buffer8.data()); 475 #endif 476 477 double dval = 0; 478 if (m_state == Number) 479 dval = WTF::strtod(m_buffer8.data(), 0L); 480 else if (m_state == Hex) { // scan hex numbers 481 const char* p = m_buffer8.data() + 2; 482 while (char c = *p++) { 483 dval *= 16; 484 dval += convertHex(c); 485 } 486 487 if (dval >= mantissaOverflowLowerBound) 488 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); 489 490 m_state = Number; 491 } else if (m_state == Octal) { // scan octal number 492 const char* p = m_buffer8.data() + 1; 493 while (char c = *p++) { 494 dval *= 8; 495 dval += c - '0'; 496 } 497 498 if (dval >= mantissaOverflowLowerBound) 499 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); 500 501 m_state = Number; 502 } 503 504 #ifdef JSC_DEBUG_LEX 505 switch (m_state) { 506 case Eof: 507 printf("(EOF)\n"); 508 break; 509 case Other: 510 printf("(Other)\n"); 511 break; 512 case Identifier: 513 printf("(Identifier)/(Keyword)\n"); 514 break; 515 case String: 516 printf("(String)\n"); 517 break; 518 case Number: 519 printf("(Number)\n"); 520 break; 521 default: 522 printf("(unknown)"); 523 } 524 #endif 525 526 if (m_state != Identifier) 527 m_eatNextIdentifier = false; 528 529 m_restrKeyword = false; 530 m_delimited = false; 531 llocp->first_line = yylineno; 532 llocp->last_line = yylineno; 533 llocp->first_column = startOffset; 534 llocp->last_column = m_currentOffset; 535 switch (m_state) { 536 case Eof: 537 token = 0; 538 break; 539 case Other: 540 if (token == '}' || token == ';') 541 m_delimited = true; 542 break; 543 case Identifier: 544 // Apply anonymous-function hack below (eat the identifier). 545 if (m_eatNextIdentifier) { 546 m_eatNextIdentifier = false; 547 token = lex(lvalp, llocp); 548 break; 549 } 550 lvalp->ident = makeIdentifier(m_buffer16); 551 token = IDENT; 552 break; 553 case IdentifierOrKeyword: { 554 lvalp->ident = makeIdentifier(m_buffer16); 555 const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident); 556 if (!entry) { 557 // Lookup for keyword failed, means this is an identifier. 558 token = IDENT; 559 break; 560 } 561 token = entry->lexerValue(); 562 // Hack for "f = function somename() { ... }"; too hard to get into the grammar. 563 m_eatNextIdentifier = token == FUNCTION && m_lastToken == '='; 564 if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW) 565 m_restrKeyword = true; 566 break; 567 } 568 case String: 569 // Atomize constant strings in case they're later used in property lookup. 570 lvalp->ident = makeIdentifier(m_buffer16); 571 token = STRING; 572 break; 573 case Number: 574 lvalp->doubleValue = dval; 575 token = NUMBER; 576 break; 577 case Bad: 578 #ifdef JSC_DEBUG_LEX 579 fprintf(stderr, "yylex: ERROR.\n"); 580 #endif 581 m_error = true; 582 return -1; 583 default: 584 ASSERT(!"unhandled numeration value in switch"); 585 m_error = true; 586 return -1; 587 } 588 m_lastToken = token; 589 return token; 590 } 591 592 bool Lexer::isWhiteSpace() const 593 { 594 return isWhiteSpace(m_current); 595 } 596 597 bool Lexer::isLineTerminator() 598 { 599 bool cr = (m_current == '\r'); 600 bool lf = (m_current == '\n'); 601 if (cr) 602 m_skipLF = true; 603 else if (lf) 604 m_skipCR = true; 605 return cr || lf || m_current == 0x2028 || m_current == 0x2029; 606 } 607 608 bool Lexer::isIdentStart(int c) 609 { 610 return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other))); 611 } 612 613 bool Lexer::isIdentPart(int c) 614 { 615 return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other 616 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector))); 617 } 618 619 static bool isDecimalDigit(int c) 620 { 621 return isASCIIDigit(c); 622 } 623 624 bool Lexer::isHexDigit(int c) 625 { 626 return isASCIIHexDigit(c); 627 } 628 629 bool Lexer::isOctalDigit(int c) 630 { 631 return isASCIIOctalDigit(c); 632 } 633 634 int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4) 635 { 636 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { 637 shift(4); 638 return URSHIFTEQUAL; 639 } 640 if (c1 == '=' && c2 == '=' && c3 == '=') { 641 shift(3); 642 return STREQ; 643 } 644 if (c1 == '!' && c2 == '=' && c3 == '=') { 645 shift(3); 646 return STRNEQ; 647 } 648 if (c1 == '>' && c2 == '>' && c3 == '>') { 649 shift(3); 650 return URSHIFT; 651 } 652 if (c1 == '<' && c2 == '<' && c3 == '=') { 653 shift(3); 654 return LSHIFTEQUAL; 655 } 656 if (c1 == '>' && c2 == '>' && c3 == '=') { 657 shift(3); 658 return RSHIFTEQUAL; 659 } 660 if (c1 == '<' && c2 == '=') { 661 shift(2); 662 return LE; 663 } 664 if (c1 == '>' && c2 == '=') { 665 shift(2); 666 return GE; 667 } 668 if (c1 == '!' && c2 == '=') { 669 shift(2); 670 return NE; 671 } 672 if (c1 == '+' && c2 == '+') { 673 shift(2); 674 if (m_terminator) 675 return AUTOPLUSPLUS; 676 return PLUSPLUS; 677 } 678 if (c1 == '-' && c2 == '-') { 679 shift(2); 680 if (m_terminator) 681 return AUTOMINUSMINUS; 682 return MINUSMINUS; 683 } 684 if (c1 == '=' && c2 == '=') { 685 shift(2); 686 return EQEQ; 687 } 688 if (c1 == '+' && c2 == '=') { 689 shift(2); 690 return PLUSEQUAL; 691 } 692 if (c1 == '-' && c2 == '=') { 693 shift(2); 694 return MINUSEQUAL; 695 } 696 if (c1 == '*' && c2 == '=') { 697 shift(2); 698 return MULTEQUAL; 699 } 700 if (c1 == '/' && c2 == '=') { 701 shift(2); 702 return DIVEQUAL; 703 } 704 if (c1 == '&' && c2 == '=') { 705 shift(2); 706 return ANDEQUAL; 707 } 708 if (c1 == '^' && c2 == '=') { 709 shift(2); 710 return XOREQUAL; 711 } 712 if (c1 == '%' && c2 == '=') { 713 shift(2); 714 return MODEQUAL; 715 } 716 if (c1 == '|' && c2 == '=') { 717 shift(2); 718 return OREQUAL; 719 } 720 if (c1 == '<' && c2 == '<') { 721 shift(2); 722 return LSHIFT; 723 } 724 if (c1 == '>' && c2 == '>') { 725 shift(2); 726 return RSHIFT; 727 } 728 if (c1 == '&' && c2 == '&') { 729 shift(2); 730 return AND; 731 } 732 if (c1 == '|' && c2 == '|') { 733 shift(2); 734 return OR; 735 } 736 737 switch (c1) { 209 ALWAYS_INLINE JSC::Identifier* Lexer::makeIdentifier(const Vector<UChar>& buffer) 210 { 211 m_identifiers.append(JSC::Identifier(m_globalData, buffer.data(), buffer.size())); 212 return &m_identifiers.last(); 213 } 214 215 ALWAYS_INLINE int Lexer::matchPunctuator(int& charPos) 216 { 217 switch (m_current) { 218 case '>': 219 if (m_next1 == '>' && m_next2 == '>') { 220 if (m_next3 == '=') { 221 shift4(); 222 return URSHIFTEQUAL; 223 } 224 shift3(); 225 return URSHIFT; 226 } 227 if (m_next1 == '>') { 228 if (m_next2 == '=') { 229 shift3(); 230 return RSHIFTEQUAL; 231 } 232 shift2(); 233 return RSHIFT; 234 } 235 if (m_next1 == '=') { 236 shift2(); 237 return GE; 238 } 239 shift1(); 240 return '>'; 738 241 case '=': 739 case '>': 242 if (m_next1 == '=') { 243 if (m_next2 == '=') { 244 shift3(); 245 return STREQ; 246 } 247 shift2(); 248 return EQEQ; 249 } 250 shift1(); 251 return '='; 252 case '!': 253 if (m_next1 == '=') { 254 if (m_next2 == '=') { 255 shift3(); 256 return STRNEQ; 257 } 258 shift2(); 259 return NE; 260 } 261 shift1(); 262 return '!'; 740 263 case '<': 264 if (m_next1 == '<') { 265 if (m_next2 == '=') { 266 shift3(); 267 return LSHIFTEQUAL; 268 } 269 shift2(); 270 return LSHIFT; 271 } 272 if (m_next1 == '=') { 273 shift2(); 274 return LE; 275 } 276 shift1(); 277 return '<'; 278 case '+': 279 if (m_next1 == '+') { 280 shift2(); 281 if (m_terminator) 282 return AUTOPLUSPLUS; 283 return PLUSPLUS; 284 } 285 if (m_next1 == '=') { 286 shift2(); 287 return PLUSEQUAL; 288 } 289 shift1(); 290 return '+'; 291 case '-': 292 if (m_next1 == '-') { 293 shift2(); 294 if (m_terminator) 295 return AUTOMINUSMINUS; 296 return MINUSMINUS; 297 } 298 if (m_next1 == '=') { 299 shift2(); 300 return MINUSEQUAL; 301 } 302 shift1(); 303 return '-'; 304 case '*': 305 if (m_next1 == '=') { 306 shift2(); 307 return MULTEQUAL; 308 } 309 shift1(); 310 return '*'; 311 case '/': 312 if (m_next1 == '=') { 313 shift2(); 314 return DIVEQUAL; 315 } 316 shift1(); 317 return '/'; 318 case '&': 319 if (m_next1 == '&') { 320 shift2(); 321 return AND; 322 } 323 if (m_next1 == '=') { 324 shift2(); 325 return ANDEQUAL; 326 } 327 shift1(); 328 return '&'; 329 case '^': 330 if (m_next1 == '=') { 331 shift2(); 332 return XOREQUAL; 333 } 334 shift1(); 335 return '^'; 336 case '%': 337 if (m_next1 == '=') { 338 shift2(); 339 return MODEQUAL; 340 } 341 shift1(); 342 return '%'; 343 case '|': 344 if (m_next1 == '=') { 345 shift2(); 346 return OREQUAL; 347 } 348 if (m_next1 == '|') { 349 shift2(); 350 return OR; 351 } 352 shift1(); 353 return '|'; 741 354 case ',': 742 case '!': 355 shift1(); 356 return ','; 743 357 case '~': 358 shift1(); 359 return '~'; 744 360 case '?': 361 shift1(); 362 return '?'; 745 363 case ':': 364 shift1(); 365 return ':'; 746 366 case '.': 747 case '+': 748 case '-': 749 case '*': 750 case '/': 751 case '&': 752 case '|': 753 case '^': 754 case '%': 367 shift1(); 368 return '.'; 755 369 case '(': 370 shift1(); 371 return '('; 756 372 case ')': 373 shift1(); 374 return ')'; 757 375 case '[': 376 shift1(); 377 return '['; 758 378 case ']': 379 shift1(); 380 return ']'; 759 381 case ';': 760 shift (1);761 return static_cast<int>(c1);382 shift1(); 383 return ';'; 762 384 case '{': 763 charPos = m_currentOffset;764 shift (1);385 charPos = currentOffset(); 386 shift1(); 765 387 return OPENBRACE; 766 388 case '}': 767 charPos = m_currentOffset;768 shift (1);389 charPos = currentOffset(); 390 shift1(); 769 391 return CLOSEBRACE; 770 default: 771 return -1; 772 } 773 } 774 775 unsigned short Lexer::singleEscape(unsigned short c) 392 } 393 394 return -1; 395 } 396 397 ALWAYS_INLINE bool Lexer::isLineTerminator() 398 { 399 bool cr = m_current == '\r'; 400 bool lf = m_current == '\n'; 401 m_skipLineEnd |= (cr << SkipLFShift) | (lf << SkipCRShift); 402 return cr | lf | ((m_current & ~1) == 0x2028); 403 } 404 405 inline bool Lexer::lastTokenWasRestrKeyword() const 406 { 407 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; 408 } 409 410 static NEVER_INLINE bool isNonASCIIIdentStart(int c) 411 { 412 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); 413 } 414 415 static inline bool isIdentStart(int c) 416 { 417 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); 418 } 419 420 static NEVER_INLINE bool isNonASCIIIdentPart(int c) 421 { 422 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other 423 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); 424 } 425 426 static inline bool isIdentPart(int c) 427 { 428 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); 429 } 430 431 static int singleEscape(int c) 776 432 { 777 433 switch (c) { … … 799 455 } 800 456 801 unsigned short Lexer::convertOctal(int c1, int c2, int c3) 802 { 803 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); 804 } 805 806 unsigned char Lexer::convertHex(int c) 807 { 808 if (c >= '0' && c <= '9') 809 return static_cast<unsigned char>(c - '0'); 810 if (c >= 'a' && c <= 'f') 811 return static_cast<unsigned char>(c - 'a' + 10); 812 return static_cast<unsigned char>(c - 'A' + 10); 813 } 814 815 unsigned char Lexer::convertHex(int c1, int c2) 816 { 817 return ((convertHex(c1) << 4) + convertHex(c2)); 818 } 819 820 UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) 821 { 822 unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2); 823 unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4); 824 return (highByte << 8 | lowByte); 825 } 826 827 void Lexer::record8(int c) 457 static inline int convertOctal(int c1, int c2, int c3) 458 { 459 return (c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'; 460 } 461 462 inline void Lexer::record8(int c) 828 463 { 829 464 ASSERT(c >= 0); 830 ASSERT(c <= 0x ff);465 ASSERT(c <= 0xFF); 831 466 m_buffer8.append(static_cast<char>(c)); 832 467 } 833 468 834 void Lexer::record16(int c)469 inline void Lexer::record16(int c) 835 470 { 836 471 ASSERT(c >= 0); … … 839 474 } 840 475 841 void Lexer::record16(UChar c)476 inline void Lexer::record16(UChar c) 842 477 { 843 478 m_buffer16.append(c); 479 } 480 481 int Lexer::lex(void* p1, void* p2) 482 { 483 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); 484 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); 485 int token = 0; 486 m_state = Start; 487 int stringType = 0; // either single or double quotes 488 m_buffer8.resize(0); 489 m_buffer16.resize(0); 490 m_done = false; 491 m_terminator = false; 492 m_skipLineEnd = 0; 493 494 int startOffset = currentOffset(); 495 496 while (true) { 497 if (m_skipLineEnd) { 498 if (m_current != '\n') // found \r but not \n afterwards 499 m_skipLineEnd &= ~SkipLF; 500 if (m_current != '\r') // found \n but not \r afterwards 501 m_skipLineEnd &= ~SkipCR; 502 if (m_skipLineEnd) { // found \r\n or \n\r -> eat the second one 503 m_skipLineEnd = 0; 504 shift1(); 505 } 506 } 507 switch (m_state) { 508 case Start: 509 startOffset = currentOffset(); 510 if (isWhiteSpace(m_current)) { 511 // do nothing 512 } else if (m_current == '/' && m_next1 == '/') { 513 shift1(); 514 m_state = InSingleLineComment; 515 } else if (m_current == '/' && m_next1 == '*') { 516 shift1(); 517 m_state = InMultiLineComment; 518 } else if (m_current == -1) { 519 if (!m_terminator && !m_delimited && !m_isReparsing) { 520 // automatic semicolon insertion if program incomplete 521 token = ';'; 522 setDone(Other); 523 } else 524 setDone(Eof); 525 } else if (isLineTerminator()) { 526 nextLine(); 527 m_terminator = true; 528 if (lastTokenWasRestrKeyword()) { 529 token = ';'; 530 setDone(Other); 531 } 532 } else if (m_current == '"' || m_current == '\'') { 533 m_state = InString; 534 stringType = m_current; 535 } else if (isIdentStart(m_current)) { 536 record16(m_current); 537 m_state = InIdentifierOrKeyword; 538 } else if (m_current == '\\') 539 m_state = InIdentifierStartUnicodeEscapeStart; 540 else if (m_current == '0') { 541 record8(m_current); 542 m_state = InNum0; 543 } else if (isASCIIDigit(m_current)) { 544 record8(m_current); 545 m_state = InNum; 546 } else if (m_current == '.' && isASCIIDigit(m_next1)) { 547 record8(m_current); 548 m_state = InDecimal; 549 } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { 550 // <!-- marks the beginning of a line comment (for www usage) 551 shift3(); 552 m_state = InSingleLineComment; 553 } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') { 554 // same for --> 555 shift2(); 556 m_state = InSingleLineComment; 557 } else { 558 token = matchPunctuator(lvalp->intValue); 559 if (token != -1) 560 setDone(Other); 561 else 562 setDone(Bad); 563 } 564 goto stillAtLineStart; 565 case InString: 566 if (m_current == stringType) { 567 shift1(); 568 setDone(String); 569 } else if (isLineTerminator() || m_current == -1) 570 setDone(Bad); 571 else if (m_current == '\\') 572 m_state = InEscapeSequence; 573 else 574 record16(m_current); 575 break; 576 // Escape Sequences inside of strings 577 case InEscapeSequence: 578 if (isASCIIOctalDigit(m_current)) { 579 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { 580 record16(convertOctal(m_current, m_next1, m_next2)); 581 shift2(); 582 m_state = InString; 583 } else if (isASCIIOctalDigit(m_current) && isASCIIOctalDigit(m_next1)) { 584 record16(convertOctal('0', m_current, m_next1)); 585 shift1(); 586 m_state = InString; 587 } else if (isASCIIOctalDigit(m_current)) { 588 record16(convertOctal('0', '0', m_current)); 589 m_state = InString; 590 } else 591 setDone(Bad); 592 } else if (m_current == 'x') 593 m_state = InHexEscape; 594 else if (m_current == 'u') 595 m_state = InUnicodeEscape; 596 else if (isLineTerminator()) { 597 nextLine(); 598 m_state = InString; 599 } else { 600 record16(singleEscape(m_current)); 601 m_state = InString; 602 } 603 break; 604 case InHexEscape: 605 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { 606 m_state = InString; 607 record16(convertHex(m_current, m_next1)); 608 shift1(); 609 } else if (m_current == stringType) { 610 record16('x'); 611 shift1(); 612 setDone(String); 613 } else { 614 record16('x'); 615 record16(m_current); 616 m_state = InString; 617 } 618 break; 619 case InUnicodeEscape: 620 if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { 621 record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); 622 shift3(); 623 m_state = InString; 624 } else if (m_current == stringType) { 625 record16('u'); 626 shift1(); 627 setDone(String); 628 } else 629 setDone(Bad); 630 break; 631 case InSingleLineComment: 632 if (isLineTerminator()) { 633 nextLine(); 634 m_terminator = true; 635 if (lastTokenWasRestrKeyword()) { 636 token = ';'; 637 setDone(Other); 638 } else 639 m_state = Start; 640 } else if (m_current == -1) 641 setDone(Eof); 642 goto stillAtLineStart; 643 case InMultiLineComment: 644 if (isLineTerminator()) 645 nextLine(); 646 else if (m_current == '*' && m_next1 == '/') { 647 m_state = Start; 648 shift1(); 649 } else if (m_current == -1) 650 setDone(Bad); 651 break; 652 case InIdentifierOrKeyword: 653 if (isIdentPart(m_current)) { 654 record16(m_current); 655 while (isIdentPart(m_next1)) { 656 shift1(); 657 record16(m_current); 658 } 659 } else if (m_current == '\\') 660 m_state = InIdentifierPartUnicodeEscapeStart; 661 else 662 setDone(IdentifierOrKeyword); 663 break; 664 case InIdentifier: 665 if (isIdentPart(m_current)) { 666 record16(m_current); 667 while (isIdentPart(m_next1)) { 668 shift1(); 669 record16(m_current); 670 } 671 } else if (m_current == '\\') 672 m_state = InIdentifierPartUnicodeEscapeStart; 673 else 674 setDone(Identifier); 675 break; 676 case InNum0: 677 if (m_current == 'x' || m_current == 'X') { 678 record8(m_current); 679 m_state = InHex; 680 } else if (m_current == '.') { 681 record8(m_current); 682 m_state = InDecimal; 683 } else if (m_current == 'e' || m_current == 'E') { 684 record8(m_current); 685 m_state = InExponentIndicator; 686 } else if (isASCIIOctalDigit(m_current)) { 687 record8(m_current); 688 m_state = InOctal; 689 } else if (isASCIIDigit(m_current)) { 690 record8(m_current); 691 m_state = InDecimal; 692 } else 693 setDone(Number); 694 break; 695 case InHex: 696 if (isASCIIHexDigit(m_current)) { 697 record8(m_current); 698 while (isASCIIHexDigit(m_next1)) { 699 shift1(); 700 record8(m_current); 701 } 702 } else 703 setDone(Hex); 704 break; 705 case InOctal: 706 if (isASCIIOctalDigit(m_current)) { 707 record8(m_current); 708 while (isASCIIOctalDigit(m_next1)) { 709 shift1(); 710 record8(m_current); 711 } 712 } else if (isASCIIDigit(m_current)) { 713 record8(m_current); 714 m_state = InDecimal; 715 } else 716 setDone(Octal); 717 break; 718 case InNum: 719 if (isASCIIDigit(m_current)) { 720 record8(m_current); 721 while (isASCIIDigit(m_next1)) { 722 shift1(); 723 record8(m_current); 724 } 725 } else if (m_current == '.') { 726 record8(m_current); 727 m_state = InDecimal; 728 } else if (m_current == 'e' || m_current == 'E') { 729 record8(m_current); 730 m_state = InExponentIndicator; 731 } else 732 setDone(Number); 733 break; 734 case InDecimal: 735 if (isASCIIDigit(m_current)) { 736 record8(m_current); 737 while (isASCIIDigit(m_next1)) { 738 shift1(); 739 record8(m_current); 740 } 741 } else if (m_current == 'e' || m_current == 'E') { 742 record8(m_current); 743 m_state = InExponentIndicator; 744 } else 745 setDone(Number); 746 break; 747 case InExponentIndicator: 748 if (m_current == '+' || m_current == '-') 749 record8(m_current); 750 else if (isASCIIDigit(m_current)) { 751 record8(m_current); 752 m_state = InExponent; 753 } else 754 setDone(Bad); 755 break; 756 case InExponent: 757 if (isASCIIDigit(m_current)) 758 record8(m_current); 759 else 760 setDone(Number); 761 break; 762 case InIdentifierStartUnicodeEscapeStart: 763 if (m_current == 'u') 764 m_state = InIdentifierStartUnicodeEscape; 765 else 766 setDone(Bad); 767 break; 768 case InIdentifierPartUnicodeEscapeStart: 769 if (m_current == 'u') 770 m_state = InIdentifierPartUnicodeEscape; 771 else 772 setDone(Bad); 773 break; 774 case InIdentifierStartUnicodeEscape: 775 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)) { 776 setDone(Bad); 777 break; 778 } 779 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 780 shift3(); 781 if (!isIdentStart(token)) { 782 setDone(Bad); 783 break; 784 } 785 record16(token); 786 m_state = InIdentifier; 787 break; 788 case InIdentifierPartUnicodeEscape: 789 if (!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)) { 790 setDone(Bad); 791 break; 792 } 793 token = convertUnicode(m_current, m_next1, m_next2, m_next3); 794 shift3(); 795 if (!isIdentPart(token)) { 796 setDone(Bad); 797 break; 798 } 799 record16(token); 800 m_state = InIdentifier; 801 break; 802 default: 803 ASSERT_NOT_REACHED(); 804 } 805 806 m_atLineStart = false; 807 808 stillAtLineStart: 809 if (m_done) 810 break; 811 812 shift1(); 813 } 814 815 if (m_state == Number || m_state == Octal || m_state == Hex) { 816 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. 817 if (isIdentStart(m_current)) 818 m_state = Bad; 819 else { 820 // terminate string 821 m_buffer8.append('\0'); 822 823 if (m_state == Number) 824 lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0L); 825 else if (m_state == Hex) { // scan hex numbers 826 double dval = 0; 827 828 const char* p = m_buffer8.data() + 2; 829 while (char c = *p++) { 830 dval *= 16; 831 dval += toASCIIHexValue(c); 832 } 833 834 if (dval >= mantissaOverflowLowerBound) 835 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); 836 837 m_state = Number; 838 lvalp->doubleValue = dval; 839 } else { // scan octal number 840 double dval = 0; 841 842 const char* p = m_buffer8.data() + 1; 843 while (char c = *p++) { 844 dval *= 8; 845 dval += c - '0'; 846 } 847 848 if (dval >= mantissaOverflowLowerBound) 849 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); 850 851 m_state = Number; 852 lvalp->doubleValue = dval; 853 } 854 } 855 } 856 857 m_delimited = false; 858 859 int lineNumber = m_lineNumber; 860 llocp->first_line = lineNumber; 861 llocp->last_line = lineNumber; 862 llocp->first_column = startOffset; 863 llocp->last_column = currentOffset(); 864 865 switch (m_state) { 866 case Eof: 867 token = 0; 868 break; 869 case Other: 870 m_delimited = (token == '}') | (token == ';'); 871 break; 872 case Identifier: 873 lvalp->ident = makeIdentifier(m_buffer16); 874 token = IDENT; 875 break; 876 case IdentifierOrKeyword: { 877 lvalp->ident = makeIdentifier(m_buffer16); 878 const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); 879 if (!entry) { 880 // Lookup for keyword failed, means this is an identifier. 881 token = IDENT; 882 break; 883 } 884 token = entry->lexerValue(); 885 break; 886 } 887 case String: 888 // Atomize constant strings in case they're later used in property lookup. 889 lvalp->ident = makeIdentifier(m_buffer16); 890 token = STRING; 891 break; 892 case Number: 893 token = NUMBER; 894 break; 895 default: 896 ASSERT_NOT_REACHED(); 897 // Fall through. 898 case Bad: 899 m_error = true; 900 return -1; 901 } 902 903 m_lastToken = token; 904 return token; 844 905 } 845 906 … … 853 914 if (isLineTerminator() || m_current == -1) 854 915 return false; 855 else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {916 else if (m_current != '/' || lastWasEscape || inBrackets) { 856 917 // keep track of '[' and ']' 857 918 if (!lastWasEscape) { 858 if ( m_current == '[' && !inBrackets)919 if (m_current == '[' && !inBrackets) 859 920 inBrackets = true; 860 if ( m_current == ']' && inBrackets)921 if (m_current == ']' && inBrackets) 861 922 inBrackets = false; 862 923 } 863 924 record16(m_current); 864 lastWasEscape = 865 !lastWasEscape && (m_current == '\\'); 925 lastWasEscape = !lastWasEscape && m_current == '\\'; 866 926 } else { // end of regexp 867 927 m_pattern = UString(m_buffer16); 868 928 m_buffer16.resize(0); 869 shift (1);929 shift1(); 870 930 break; 871 931 } 872 shift (1);932 shift1(); 873 933 } 874 934 875 935 while (isIdentPart(m_current)) { 876 936 record16(m_current); 877 shift (1);937 shift1(); 878 938 } 879 939 m_flags = UString(m_buffer16); … … 885 945 { 886 946 m_identifiers.clear(); 947 m_codeWithoutBOMs.clear(); 887 948 888 949 Vector<char> newBuffer8; … … 896 957 m_isReparsing = false; 897 958 898 m_pattern = 0; 899 m_flags = 0; 959 m_pattern = UString(); 960 m_flags = UString(); 961 } 962 963 SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) 964 { 965 if (m_codeWithoutBOMs.isEmpty()) 966 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); 967 968 const UChar* data = m_source->provider()->data(); 969 970 ASSERT(openBrace < closeBrace); 971 972 int numBOMsBeforeOpenBrace = 0; 973 int numBOMsBetweenBraces = 0; 974 975 int i; 976 for (i = m_source->startOffset(); i < openBrace; ++i) 977 numBOMsBeforeOpenBrace += data[i] == byteOrderMark; 978 for (; i < closeBrace; ++i) 979 numBOMsBetweenBraces += data[i] == byteOrderMark; 980 981 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, 982 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); 900 983 } 901 984
Note:
See TracChangeset
for help on using the changeset viewer.