Changeset 72813 in webkit for trunk/JavaScriptCore/yarr/RegexParser.h
- Timestamp:
- Nov 29, 2010, 10:52:16 AM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/yarr/RegexParser.h
r72489 r72813 57 57 ParenthesesTypeInvalid, 58 58 CharacterClassUnmatched, 59 CharacterClassInvalidRange, 59 60 CharacterClassOutOfOrder, 60 61 EscapeUnterminated, … … 76 77 : m_delegate(delegate) 77 78 , m_err(err) 78 , m_state( empty)79 , m_state(Empty) 79 80 { 80 81 } … … 91 92 92 93 /* 93 * atomPatternCharacter Unescaped():94 * atomPatternCharacter(): 94 95 * 95 * This method is called directly from parseCharacterClass(), to report a new96 * pattern character token. This method differs from atomPatternCharacter(),97 * which will be called from parseEscape(), since a hypen provided viathis98 * m ethod may be indicating a character range, but a hyphen parsed by99 * parseEscape() cannot be interpreted as doing so.96 * This method is called either from parseCharacterClass() (for an unescaped 97 * character in a character class), or from parseEscape(). In the former case 98 * the value true will be passed for the argument 'hyphenIsRange', and in this 99 * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/ 100 * is different to /[a\-z]/). 100 101 */ 101 void atomPatternCharacter Unescaped(UChar ch)102 void atomPatternCharacter(UChar ch, bool hyphenIsRange = false) 102 103 { 103 104 switch (m_state) { 104 case empty: 105 case AfterCharacterClass: 106 // Following a builtin character class we need look out for a hyphen. 107 // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/. 108 // If we see a hyphen following a charater class then unlike usual 109 // we'll report it to the delegate immediately, and put ourself into 110 // a poisoned state. Any following calls to add another character or 111 // character class will result in an error. (A hypen following a 112 // character-class is itself valid, but only at the end of a regex). 113 if (hyphenIsRange && ch == '-') { 114 m_delegate.atomCharacterClassAtom('-'); 115 m_state = AfterCharacterClassHyphen; 116 return; 117 } 118 // Otherwise just fall through - cached character so treat this as Empty. 119 120 case Empty: 105 121 m_character = ch; 106 m_state = cachedCharacter;107 break;108 109 case cachedCharacter:110 if ( ch == '-')111 m_state = cachedCharacterHyphen;122 m_state = CachedCharacter; 123 return; 124 125 case CachedCharacter: 126 if (hyphenIsRange && ch == '-') 127 m_state = CachedCharacterHyphen; 112 128 else { 113 129 m_delegate.atomCharacterClassAtom(m_character); 114 130 m_character = ch; 115 131 } 116 break; 117 118 case cachedCharacterHyphen: 119 if (ch >= m_character) 120 m_delegate.atomCharacterClassRange(m_character, ch); 121 else 132 return; 133 134 case CachedCharacterHyphen: 135 if (ch < m_character) { 122 136 m_err = CharacterClassOutOfOrder; 123 m_state = empty; 124 } 125 } 126 127 /* 128 * atomPatternCharacter(): 129 * 130 * Adds a pattern character, called by parseEscape(), as such will not 131 * interpret a hyphen as indicating a character range. 132 */ 133 void atomPatternCharacter(UChar ch) 134 { 135 // Flush if a character is already pending to prevent the 136 // hyphen from begin interpreted as indicating a range. 137 if((ch == '-') && (m_state == cachedCharacter)) 138 flush(); 139 140 atomPatternCharacterUnescaped(ch); 137 return; 138 } 139 m_delegate.atomCharacterClassRange(m_character, ch); 140 m_state = Empty; 141 return; 142 143 case AfterCharacterClassHyphen: 144 // Error! We have something like /[\d-x]/. 145 m_err = CharacterClassInvalidRange; 146 return; 147 } 141 148 } 142 149 … … 148 155 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert) 149 156 { 150 flush(); 151 m_delegate.atomCharacterClassBuiltIn(classID, invert); 157 switch (m_state) { 158 case CachedCharacter: 159 // Flush the currently cached character, then fall through. 160 m_delegate.atomCharacterClassAtom(m_character); 161 162 case Empty: 163 case AfterCharacterClass: 164 m_state = AfterCharacterClass; 165 m_delegate.atomCharacterClassBuiltIn(classID, invert); 166 return; 167 168 case CachedCharacterHyphen: 169 case AfterCharacterClassHyphen: 170 // Error! If we hit either of these cases, we have an 171 // invalid range that looks something like /[x-\d]/ 172 // or /[\d-\d]/. 173 m_err = CharacterClassInvalidRange; 174 return; 175 } 152 176 } 153 177 … … 159 183 void end() 160 184 { 161 flush(); 185 if (m_state == CachedCharacter) 186 m_delegate.atomCharacterClassAtom(m_character); 187 else if (m_state == CachedCharacterHyphen) { 188 m_delegate.atomCharacterClassAtom(m_character); 189 m_delegate.atomCharacterClassAtom('-'); 190 } 162 191 m_delegate.atomCharacterClassEnd(); 163 192 } … … 169 198 170 199 private: 171 void flush()172 {173 if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen174 m_delegate.atomCharacterClassAtom(m_character);175 if (m_state == cachedCharacterHyphen)176 m_delegate.atomCharacterClassAtom('-');177 m_state = empty;178 }179 180 200 Delegate& m_delegate; 181 201 ErrorCode& m_err; 182 202 enum CharacterClassConstructionState { 183 empty, 184 cachedCharacter, 185 cachedCharacterHyphen, 203 Empty, 204 CachedCharacter, 205 CachedCharacterHyphen, 206 AfterCharacterClass, 207 AfterCharacterClassHyphen, 186 208 } m_state; 187 209 UChar m_character; … … 429 451 430 452 default: 431 characterClassConstructor.atomPatternCharacter Unescaped(consume());453 characterClassConstructor.atomPatternCharacter(consume(), true); 432 454 } 433 455 … … 658 680 "unrecognized character after (?", 659 681 "missing terminating ] for character class", 682 "invalid range in character class", 660 683 "range out of order in character class", 661 684 "\\ at end of pattern"
Note:
See TracChangeset
for help on using the changeset viewer.