Changeset 197426 in webkit for trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
- Timestamp:
- Mar 1, 2016, 4:39:01 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
r194496 r197426 1 1 /* 2 * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.2 * Copyright (C) 2009, 2013-2016 Apple Inc. All rights reserved. 3 3 * Copyright (C) 2010 Peter Varga ([email protected]), University of Szeged 4 4 * … … 29 29 30 30 #include "Yarr.h" 31 #include "YarrCanonicalizeU CS2.h"31 #include "YarrCanonicalizeUnicode.h" 32 32 #include "YarrParser.h" 33 33 #include <wtf/Vector.h> … … 41 41 class CharacterClassConstructor { 42 42 public: 43 CharacterClassConstructor(bool isCaseInsensitive = false)43 CharacterClassConstructor(bool isCaseInsensitive, CanonicalMode canonicalMode) 44 44 : m_isCaseInsensitive(isCaseInsensitive) 45 , m_canonicalMode(canonicalMode) 45 46 { 46 47 } … … 66 67 } 67 68 68 void putChar(UChar ch)69 void putChar(UChar32 ch) 69 70 { 70 71 // Handle ascii cases. … … 85 86 86 87 // Add multiple matches, if necessary. 87 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);88 const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_canonicalMode); 88 89 if (info->type == CanonicalizeUnique) 89 90 addSorted(m_matchesUnicode, ch); … … 92 93 } 93 94 94 void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)95 void putUnicodeIgnoreCase(UChar32 ch, const CanonicalizationRange* info) 95 96 { 96 97 ASSERT(m_isCaseInsensitive); 97 ASSERT(ch > 0x7f);98 98 ASSERT(ch >= info->begin && ch <= info->end); 99 99 ASSERT(info->type != CanonicalizeUnique); 100 100 if (info->type == CanonicalizeSet) { 101 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)102 addSorted( m_matchesUnicode,ch);101 for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) 102 addSorted(ch); 103 103 } else { 104 addSorted( m_matchesUnicode,ch);105 addSorted( m_matchesUnicode,getCanonicalPair(info, ch));106 } 107 } 108 109 void putRange(UChar lo, UCharhi)104 addSorted(ch); 105 addSorted(getCanonicalPair(info, ch)); 106 } 107 } 108 109 void putRange(UChar32 lo, UChar32 hi) 110 110 { 111 111 if (lo <= 0x7f) { 112 112 char asciiLo = lo; 113 char asciiHi = std::min(hi, (UChar )0x7f);113 char asciiHi = std::min(hi, (UChar32)0x7f); 114 114 addSortedRange(m_ranges, lo, asciiHi); 115 115 … … 124 124 return; 125 125 126 lo = std::max(lo, (UChar )0x80);126 lo = std::max(lo, (UChar32)0x80); 127 127 addSortedRange(m_rangesUnicode, lo, hi); 128 128 … … 130 130 return; 131 131 132 const UCS2CanonicalizationRange* info = rangeInfoFor(lo);132 const CanonicalizationRange* info = canonicalRangeInfoFor(lo, m_canonicalMode); 133 133 while (true) { 134 134 // Handle the range [lo .. end] 135 UChar end = std::min<UChar>(info->end, hi);135 UChar32 end = std::min<UChar32>(info->end, hi); 136 136 137 137 switch (info->type) { … … 141 141 case CanonicalizeSet: { 142 142 UChar ch; 143 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)143 for (const UChar32* set = canonicalCharacterSetInfo(info->value, m_canonicalMode); (ch = *set); ++set) 144 144 addSorted(m_matchesUnicode, ch); 145 145 break; … … 189 189 190 190 private: 191 void addSorted(Vector<UChar>& matches, UChar ch) 191 void addSorted(UChar32 ch) 192 { 193 addSorted(ch <= 0x7f ? m_matches : m_matchesUnicode, ch); 194 } 195 196 void addSorted(Vector<UChar32>& matches, UChar32 ch) 192 197 { 193 198 unsigned pos = 0; … … 215 220 } 216 221 217 void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UCharhi)222 void addSortedRange(Vector<CharacterRange>& ranges, UChar32 lo, UChar32 hi) 218 223 { 219 224 unsigned end = ranges.size(); … … 261 266 262 267 bool m_isCaseInsensitive; 263 264 Vector<UChar> m_matches; 268 CanonicalMode m_canonicalMode; 269 270 Vector<UChar32> m_matches; 265 271 Vector<CharacterRange> m_ranges; 266 Vector<UChar > m_matchesUnicode;272 Vector<UChar32> m_matchesUnicode; 267 273 Vector<CharacterRange> m_rangesUnicode; 268 274 }; … … 272 278 YarrPatternConstructor(YarrPattern& pattern) 273 279 : m_pattern(pattern) 274 , m_characterClassConstructor(pattern.m_ignoreCase )280 , m_characterClassConstructor(pattern.m_ignoreCase, pattern.m_unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2) 275 281 , m_invertParentheticalAssertion(false) 276 282 { … … 314 320 } 315 321 316 void atomPatternCharacter(UChar ch)322 void atomPatternCharacter(UChar32 ch) 317 323 { 318 324 // We handle case-insensitive checking of unicode characters which do have both 319 325 // cases by handling them as if they were defined using a CharacterClass. 320 if (!m_pattern.m_ignoreCase || isASCII(ch)) {326 if (!m_pattern.m_ignoreCase || (isASCII(ch) && !m_pattern.m_unicode)) { 321 327 m_alternative->m_terms.append(PatternTerm(ch)); 322 328 return; 323 329 } 324 330 325 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);331 const CanonicalizationRange* info = canonicalRangeInfoFor(ch, m_pattern.m_unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2); 326 332 if (info->type == CanonicalizeUnique) { 327 333 m_alternative->m_terms.append(PatternTerm(ch)); … … 358 364 } 359 365 360 void atomCharacterClassAtom(UChar ch)366 void atomCharacterClassAtom(UChar32 ch) 361 367 { 362 368 m_characterClassConstructor.putChar(ch); 363 369 } 364 370 365 void atomCharacterClassRange(UChar begin, UCharend)371 void atomCharacterClassRange(UChar32 begin, UChar32 end) 366 372 { 367 373 m_characterClassConstructor.putRange(begin, end); … … 597 603 currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter; 598 604 alternative->m_hasFixedSize = false; 605 } else if (m_pattern.m_unicode) { 606 currentInputPosition += (!U_IS_BMP(term.patternCharacter) ? 2 : 1) * term.quantityCount; 599 607 } else 600 608 currentInputPosition += term.quantityCount; … … 606 614 term.frameLocation = currentCallFrameSize; 607 615 currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; 616 alternative->m_hasFixedSize = false; 617 } else if (m_pattern.m_unicode) { 618 term.frameLocation = currentCallFrameSize; 619 currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass; 620 currentInputPosition += term.quantityCount; 608 621 alternative->m_hasFixedSize = false; 609 622 } else … … 833 846 YarrPatternConstructor constructor(*this); 834 847 835 if (const char* error = parse(constructor, patternString ))848 if (const char* error = parse(constructor, patternString, m_unicode)) 836 849 return error; 837 850 … … 847 860 const char* error = 848 861 #endif 849 parse(constructor, patternString, numSubpatterns);862 parse(constructor, patternString, m_unicode, numSubpatterns); 850 863 851 864 ASSERT(!error); … … 862 875 } 863 876 864 YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, const char** error)877 YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, bool unicode, const char** error) 865 878 : m_ignoreCase(ignoreCase) 866 879 , m_multiline(multiline) 880 , m_unicode(unicode) 867 881 , m_containsBackreferences(false) 868 882 , m_containsBOL(false)
Note:
See TracChangeset
for help on using the changeset viewer.