Changeset 87109 in webkit for trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
- Timestamp:
- May 23, 2011, 5:09:19 PM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/yarr/YarrPattern.cpp
r86547 r87109 235 235 }; 236 236 237 struct BeginCharHelper {238 BeginCharHelper(Vector<BeginChar>* beginChars, bool isCaseInsensitive = false)239 : m_beginChars(beginChars)240 , m_isCaseInsensitive(isCaseInsensitive)241 {}242 243 void addBeginChar(BeginChar beginChar, Vector<TermChain>* hotTerms, QuantifierType quantityType, unsigned quantityCount)244 {245 if (quantityType == QuantifierFixedCount && quantityCount > 1) {246 // We duplicate the first found character if the quantity of the term is more than one. eg.: /a{3}/247 beginChar.value |= beginChar.value << 16;248 beginChar.mask |= beginChar.mask << 16;249 addCharacter(beginChar);250 } else if (quantityType == QuantifierFixedCount && quantityCount == 1 && hotTerms->size())251 // In case of characters with fixed quantifier we should check the next character as well.252 linkHotTerms(beginChar, hotTerms);253 else254 // In case of greedy matching the next character checking is unnecessary therefore we just store255 // the first character.256 addCharacter(beginChar);257 }258 259 // Merge two following BeginChars in the vector to reduce the number of character checks.260 void merge(unsigned size)261 {262 for (unsigned i = 0; i < size; i++) {263 BeginChar* curr = &m_beginChars->at(i);264 BeginChar* next = &m_beginChars->at(i + 1);265 266 // If the current and the next size of value is different we should skip the merge process267 // because the 16bit and 32bit values are unmergable.268 if (curr->value <= 0xFFFF && next->value > 0xFFFF)269 continue;270 271 unsigned diff = curr->value ^ next->value;272 273 curr->mask |= diff;274 curr->value |= curr->mask;275 276 m_beginChars->remove(i + 1);277 size--;278 }279 }280 281 private:282 void addCharacter(BeginChar beginChar)283 {284 unsigned pos = 0;285 unsigned range = m_beginChars->size();286 287 // binary chop, find position to insert char.288 while (range) {289 unsigned index = range >> 1;290 291 int val = m_beginChars->at(pos+index).value - beginChar.value;292 if (!val)293 return;294 if (val < 0)295 range = index;296 else {297 pos += (index+1);298 range -= (index+1);299 }300 }301 302 if (pos == m_beginChars->size())303 m_beginChars->append(beginChar);304 else305 m_beginChars->insert(pos, beginChar);306 }307 308 // Create BeginChar objects by appending each terms from a hotTerms vector to an existing BeginChar object.309 void linkHotTerms(BeginChar beginChar, Vector<TermChain>* hotTerms)310 {311 for (unsigned i = 0; i < hotTerms->size(); i++) {312 PatternTerm hotTerm = hotTerms->at(i).term;313 ASSERT(hotTerm.type == PatternTerm::TypePatternCharacter);314 315 UChar characterNext = hotTerm.patternCharacter;316 317 // Append a character to an existing BeginChar object.318 if (characterNext <= 0x7f) {319 unsigned mask = 0;320 321 if (m_isCaseInsensitive && isASCIIAlpha(characterNext)) {322 mask = 32;323 characterNext = toASCIILower(characterNext);324 }325 326 addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask | (mask << 16)));327 } else {328 UChar upper, lower;329 if (m_isCaseInsensitive && ((upper = Unicode::toUpper(characterNext)) != (lower = Unicode::toLower(characterNext)))) {330 addCharacter(BeginChar(beginChar.value | (upper << 16), beginChar.mask));331 addCharacter(BeginChar(beginChar.value | (lower << 16), beginChar.mask));332 } else333 addCharacter(BeginChar(beginChar.value | (characterNext << 16), beginChar.mask));334 }335 }336 }337 338 Vector<BeginChar>* m_beginChars;339 bool m_isCaseInsensitive;340 };341 342 237 class YarrPatternConstructor { 343 238 public: … … 345 240 : m_pattern(pattern) 346 241 , m_characterClassConstructor(pattern.m_ignoreCase) 347 , m_beginCharHelper(&pattern.m_beginChars, pattern.m_ignoreCase)348 242 , m_invertParentheticalAssertion(false) 349 243 { … … 782 676 } 783 677 784 // This function collects the terms which are potentially matching the first number of depth characters in the result.785 // If this function returns false then it found at least one term which makes the beginning character786 // look-up optimization inefficient.787 bool setupDisjunctionBeginTerms(PatternDisjunction* disjunction, Vector<TermChain>* beginTerms, unsigned depth)788 {789 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {790 PatternAlternative* alternative = disjunction->m_alternatives[alt];791 792 if (!setupAlternativeBeginTerms(alternative, beginTerms, 0, depth))793 return false;794 }795 796 return true;797 }798 799 bool setupAlternativeBeginTerms(PatternAlternative* alternative, Vector<TermChain>* beginTerms, unsigned termIndex, unsigned depth)800 {801 bool checkNext = true;802 unsigned numTerms = alternative->m_terms.size();803 804 while (checkNext && termIndex < numTerms) {805 PatternTerm term = alternative->m_terms[termIndex];806 checkNext = false;807 808 switch (term.type) {809 case PatternTerm::TypeAssertionBOL:810 case PatternTerm::TypeAssertionEOL:811 case PatternTerm::TypeAssertionWordBoundary:812 return false;813 814 case PatternTerm::TypeBackReference:815 case PatternTerm::TypeForwardReference:816 return false;817 818 case PatternTerm::TypePatternCharacter:819 if (termIndex != numTerms - 1) {820 beginTerms->append(TermChain(term));821 termIndex++;822 checkNext = true;823 } else if (term.quantityType == QuantifierFixedCount) {824 beginTerms->append(TermChain(term));825 if (depth < 2 && termIndex < numTerms - 1 && term.quantityCount == 1)826 if (!setupAlternativeBeginTerms(alternative, &beginTerms->last().hotTerms, termIndex + 1, depth + 1))827 return false;828 }829 830 break;831 832 case PatternTerm::TypeCharacterClass:833 return false;834 835 case PatternTerm::TypeParentheticalAssertion:836 if (term.invert())837 return false;838 839 case PatternTerm::TypeParenthesesSubpattern:840 if (term.quantityType != QuantifierFixedCount) {841 if (termIndex == numTerms - 1)842 break;843 844 termIndex++;845 checkNext = true;846 }847 848 if (!setupDisjunctionBeginTerms(term.parentheses.disjunction, beginTerms, depth))849 return false;850 851 break;852 }853 }854 855 return true;856 }857 858 void setupBeginChars()859 {860 Vector<TermChain> beginTerms;861 bool containsFixedCharacter = false;862 863 if ((!m_pattern.m_body->m_hasFixedSize || m_pattern.m_body->m_alternatives.size() > 1)864 && setupDisjunctionBeginTerms(m_pattern.m_body, &beginTerms, 0)) {865 unsigned size = beginTerms.size();866 867 // If we haven't collected any terms we should abort the preparation of beginning character look-up optimization.868 if (!size)869 return;870 871 m_pattern.m_containsBeginChars = true;872 873 for (unsigned i = 0; i < size; i++) {874 PatternTerm term = beginTerms[i].term;875 876 // We have just collected PatternCharacter terms, other terms are not allowed.877 ASSERT(term.type == PatternTerm::TypePatternCharacter);878 879 if (term.quantityType == QuantifierFixedCount)880 containsFixedCharacter = true;881 882 UChar character = term.patternCharacter;883 unsigned mask = 0;884 885 if (character <= 0x7f) {886 if (m_pattern.m_ignoreCase && isASCIIAlpha(character)) {887 mask = 32;888 character = toASCIILower(character);889 }890 891 m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);892 } else {893 UChar upper, lower;894 if (m_pattern.m_ignoreCase && ((upper = Unicode::toUpper(character)) != (lower = Unicode::toLower(character)))) {895 m_beginCharHelper.addBeginChar(BeginChar(upper, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);896 m_beginCharHelper.addBeginChar(BeginChar(lower, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);897 } else898 m_beginCharHelper.addBeginChar(BeginChar(character, mask), &beginTerms[i].hotTerms, term.quantityType, term.quantityCount);899 }900 }901 902 // If the pattern doesn't contain terms with fixed quantifiers then the beginning character look-up optimization is inefficient.903 if (!containsFixedCharacter) {904 m_pattern.m_containsBeginChars = false;905 return;906 }907 908 size = m_pattern.m_beginChars.size();909 910 if (size > 2)911 m_beginCharHelper.merge(size - 1);912 else if (size <= 1)913 m_pattern.m_containsBeginChars = false;914 }915 }916 917 678 private: 918 679 YarrPattern& m_pattern; 919 680 PatternAlternative* m_alternative; 920 681 CharacterClassConstructor m_characterClassConstructor; 921 BeginCharHelper m_beginCharHelper;922 682 bool m_invertCharacterClass; 923 683 bool m_invertParentheticalAssertion; … … 952 712 953 713 constructor.setupOffsets(); 954 constructor.setupBeginChars();955 714 956 715 return 0; … … 961 720 , m_multiline(multiline) 962 721 , m_containsBackreferences(false) 963 , m_containsBeginChars(false)964 722 , m_containsBOL(false) 965 723 , m_numSubpatterns(0)
Note:
See TracChangeset
for help on using the changeset viewer.