Changeset 17862 in webkit for trunk/JavaScriptCore/kjs/regexp.cpp
- Timestamp:
- Nov 20, 2006, 12:24:22 PM (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/kjs/regexp.cpp
r13203 r17862 45 45 const char *errorMessage; 46 46 int errorOffset; 47 UString nullTerminated(p); 48 char null(0); 49 nullTerminated.append(null); 50 _regex = pcre_compile(reinterpret_cast<const uint16_t *>(nullTerminated.data()), options, &errorMessage, &errorOffset, NULL); 51 if (!_regex) 52 return; 47 48 UString pattern(p); 49 50 pattern.append('\0'); 51 _regex = pcre_compile(reinterpret_cast<const uint16_t*>(pattern.data()), 52 options, &errorMessage, &errorOffset, NULL); 53 if (!_regex) { 54 // Try again, this time handle any \u we might find. 55 UString uPattern = sanitizePattern(pattern); 56 _regex = pcre_compile(reinterpret_cast<const uint16_t*>(uPattern.data()), 57 options, &errorMessage, &errorOffset, NULL); 58 if (!_regex) 59 return; 60 } 53 61 54 62 #ifdef PCRE_INFO_CAPTURECOUNT … … 174 182 } 175 183 184 UString RegExp::sanitizePattern(const UString& p) 185 { 186 UString newPattern; 187 188 int startPos = 0; 189 int pos = p.find("\\u", 0) + 2; // Skip the \u 190 191 while (pos != 1) { // p.find failing is -1 + 2 = 1 192 if (pos + 3 < p.size()) { 193 if (isHexDigit(p[pos]) && isHexDigit(p[pos + 1]) && 194 isHexDigit(p[pos + 2]) && isHexDigit(p[pos + 3])) { 195 newPattern.append(p.substr(startPos, pos - startPos - 2)); 196 UChar escapedUnicode(convertUnicode(p[pos], p[pos + 1], 197 p[pos + 2], p[pos + 3])); 198 // \u encoded characters should be treated as if they were escaped, 199 // so add an escape for certain characters that need it. 200 switch (escapedUnicode.unicode()) { 201 case '|': 202 case '+': 203 case '*': 204 case '(': 205 case ')': 206 case '[': 207 case ']': 208 case '{': 209 case '}': 210 case '?': 211 case '\\': 212 newPattern.append('\\'); 213 } 214 newPattern.append(escapedUnicode); 215 216 startPos = pos + 4; 217 } 218 } 219 pos = p.find("\\u", pos) + 2; 220 } 221 newPattern.append(p.substr(startPos, p.size() - startPos)); 222 223 return newPattern; 224 } 225 226 bool RegExp::isHexDigit(UChar uc) 227 { 228 int c = uc.unicode(); 229 return (c >= '0' && c <= '9' || 230 c >= 'a' && c <= 'f' || 231 c >= 'A' && c <= 'F'); 232 } 233 234 unsigned char RegExp::convertHex(int c) 235 { 236 if (c >= '0' && c <= '9') 237 return static_cast<unsigned char>(c - '0'); 238 if (c >= 'a' && c <= 'f') 239 return static_cast<unsigned char>(c - 'a' + 10); 240 return static_cast<unsigned char>(c - 'A' + 10); 241 } 242 243 unsigned char RegExp::convertHex(int c1, int c2) 244 { 245 return ((convertHex(c1) << 4) + convertHex(c2)); 246 } 247 248 UChar RegExp::convertUnicode(UChar uc1, UChar uc2, UChar uc3, UChar uc4) 249 { 250 int c1 = uc1.unicode(); 251 int c2 = uc2.unicode(); 252 int c3 = uc3.unicode(); 253 int c4 = uc4.unicode(); 254 return UChar((convertHex(c1) << 4) + convertHex(c2), 255 (convertHex(c3) << 4) + convertHex(c4)); 256 } 257 176 258 } // namespace KJS
Note:
See TracChangeset
for help on using the changeset viewer.