Changeset 4482 in webkit for trunk/JavaScriptCore/kjs/regexp.cpp
- Timestamp:
- Jun 4, 2003, 5:11:22 PM (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/kjs/regexp.cpp
r4206 r4482 26 26 #include <string.h> 27 27 28 using namespace KJS; 29 30 RegExp::RegExp(const UString &p, int f) 31 : pattern(p), flgs(f) 32 { 28 using KJS::CString; 29 using KJS::RegExp; 30 using KJS::UString; 31 33 32 #ifdef HAVE_PCREPOSIX 34 int pcreflags = 0; 35 const char *perrormsg; 33 34 static CString convertToUTF8(const UString &s) 35 { 36 // Allocate a buffer big enough to hold all the characters. 37 const int length = s.size(); 38 const unsigned bufferSize = length * 3 + 1; 39 char fixedSizeBuffer[1024]; 40 char *buffer; 41 if (bufferSize > sizeof(fixedSizeBuffer)) { 42 buffer = new char [bufferSize]; 43 } else { 44 buffer = fixedSizeBuffer; 45 } 46 47 // Convert to runs of 8-bit characters. 48 char *p = buffer; 49 for (int i = 0; i != length; ++i) { 50 unsigned short c = s[i].unicode(); 51 if (c < 0x80) { 52 *p++ = (char)c; 53 } else if (c < 0x800) { 54 *p++ = (char)((c >> 6) | 0xC0); // C0 is the 2-byte flag for UTF-8 55 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set 56 } else { 57 *p++ = (char)((c >> 12) | 0xE0); // E0 is the 3-byte flag for UTF-8 58 *p++ = (char)(((c >> 6) | 0x80) & 0xBF); // next 6 bits, with high bit set 59 *p++ = (char)((c | 0x80) & 0xBF); // next 6 bits, with high bit set 60 } 61 } 62 *p = 0; 63 64 // Return the result as a C string. 65 CString result(buffer); 66 if (buffer != fixedSizeBuffer) { 67 delete [] buffer; 68 } 69 return result; 70 } 71 72 struct StringOffset { 73 int offset; 74 int locationInOffsetsArray; 75 }; 76 77 static int compareStringOffsets(const void *a, const void *b) 78 { 79 const StringOffset *oa = static_cast<const StringOffset *>(a); 80 const StringOffset *ob = static_cast<const StringOffset *>(b); 81 82 if (oa->offset < ob->offset) { 83 return -1; 84 } 85 if (oa->offset > ob->offset) { 86 return +1; 87 } 88 return 0; 89 } 90 91 const int sortedOffsetsFixedBufferSize = 128; 92 93 static StringOffset *createSortedOffsetsArray(const int offsets[], int numOffsets, 94 StringOffset sortedOffsetsFixedBuffer[sortedOffsetsFixedBufferSize]) 95 { 96 // Allocate the sorted offsets. 97 StringOffset *sortedOffsets; 98 if (numOffsets <= sortedOffsetsFixedBufferSize) { 99 sortedOffsets = sortedOffsetsFixedBuffer; 100 } else { 101 sortedOffsets = new StringOffset [numOffsets]; 102 } 103 104 // Copy offsets. 105 for (int i = 0; i != numOffsets; ++i) { 106 sortedOffsets[i].offset = offsets[i]; 107 sortedOffsets[i].locationInOffsetsArray = i; 108 } 109 110 // Sort them. 111 qsort(sortedOffsets, numOffsets, sizeof(StringOffset), compareStringOffsets); 112 113 return sortedOffsets; 114 } 115 116 static void convertCharacterOffsetsToUTF8ByteOffsets(const char *s, int *offsets, int numOffsets) 117 { 118 // Allocate buffer. 119 StringOffset fixedBuffer[sortedOffsetsFixedBufferSize]; 120 StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer); 121 122 // Walk through sorted offsets and string, adjusting all the offests. 123 // Offsets that are off the ends of the string map to the edges of the string. 124 int characterOffset = 0; 125 const char *p = s; 126 for (int oi = 0; oi != numOffsets; ++oi) { 127 const int nextOffset = sortedOffsets[oi].offset; 128 while (*p && characterOffset < nextOffset) { 129 // Skip to the next character. 130 ++characterOffset; 131 do ++p; while ((*p & 0xC0) == 0x80); // if 1 of the 2 high bits is set, it's not the start of a character 132 } 133 offsets[sortedOffsets[oi].locationInOffsetsArray] = p - s; 134 } 135 136 // Free buffer. 137 if (sortedOffsets != fixedBuffer) { 138 delete [] sortedOffsets; 139 } 140 } 141 142 static void convertUTF8ByteOffsetsToCharacterOffsets(const char *s, int *offsets, int numOffsets) 143 { 144 // Allocate buffer. 145 StringOffset fixedBuffer[sortedOffsetsFixedBufferSize]; 146 StringOffset *sortedOffsets = createSortedOffsetsArray(offsets, numOffsets, fixedBuffer); 147 148 // Walk through sorted offsets and string, adjusting all the offests. 149 // Offsets that are off the end of the string map to the edges of the string. 150 int characterOffset = 0; 151 const char *p = s; 152 for (int oi = 0; oi != numOffsets; ++oi) { 153 const int nextOffset = sortedOffsets[oi].offset; 154 while (*p && (p - s) < nextOffset) { 155 // Skip to the next character. 156 ++characterOffset; 157 do ++p; while ((*p & 0xC0) == 0x80); // if 1 of the 2 high bits is set, it's not the start of a character 158 } 159 offsets[sortedOffsets[oi].locationInOffsetsArray] = characterOffset; 160 } 161 162 // Free buffer. 163 if (sortedOffsets != fixedBuffer) { 164 delete [] sortedOffsets; 165 } 166 } 167 168 #endif // HAVE_PCREPOSIX 169 170 RegExp::RegExp(const UString &p, int flags) 171 : _flags(flags), _numSubPatterns(0) 172 { 173 #ifdef HAVE_PCREPOSIX 174 175 int options = PCRE_UTF8; 176 // Note: the Global flag is already handled by RegExpProtoFunc::execute. 177 if (flags & IgnoreCase) 178 options |= PCRE_CASELESS; 179 if (flags & Multiline) 180 options |= PCRE_MULTILINE; 181 182 const char *errorMessage; 36 183 int errorOffset; 37 38 if (flgs & IgnoreCase) 39 pcreflags |= PCRE_CASELESS; 40 41 if (flgs & Multiline) 42 pcreflags |= PCRE_MULTILINE; 43 44 pcregex = pcre_compile(p.ascii(), pcreflags, 45 &perrormsg, &errorOffset, NULL); 184 _regex = pcre_compile(convertToUTF8(p).c_str(), options, &errorMessage, &errorOffset, NULL); 185 if (!_regex) { 46 186 #ifndef NDEBUG 47 if (!pcregex) 48 fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", perrormsg); 49 #endif 187 fprintf(stderr, "KJS: pcre_compile() failed with '%s'\n", errorMessage); 188 #endif 189 return; 190 } 50 191 51 192 #ifdef PCRE_INFO_CAPTURECOUNT 52 // Get number of subpatterns that will be returned 53 int rc = pcre_fullinfo( pcregex, NULL, PCRE_INFO_CAPTURECOUNT, &nrSubPatterns); 54 if (rc != 0) 55 #endif 56 nrSubPatterns = 0; // fallback. We always need the first pair of offsets. 193 // Get number of subpatterns that will be returned. 194 pcre_fullinfo(_regex, NULL, PCRE_INFO_CAPTURECOUNT, &_numSubPatterns); 195 #endif 57 196 58 197 #else /* HAVE_PCREPOSIX */ 59 198 60 nrSubPatterns = 0; // determined in match() with POSIX regex.61 199 int regflags = 0; 62 200 #ifdef REG_EXTENDED … … 73 211 // Note: the Global flag is already handled by RegExpProtoFunc::execute 74 212 75 regcomp(& preg, p.ascii(), regflags);213 regcomp(&_regex, p.ascii(), regflags); 76 214 /* TODO check for errors */ 77 #endif 78 215 216 #endif 79 217 } 80 218 … … 82 220 { 83 221 #ifdef HAVE_PCREPOSIX 84 if (pcregex) 85 pcre_free(pcregex); 222 pcre_free(_regex); 86 223 #else 87 224 /* TODO: is this really okay after an error ? */ 88 regfree(& preg);225 regfree(&_regex); 89 226 #endif 90 227 } … … 94 231 if (i < 0) 95 232 i = 0; 96 if (ovector)97 *ovector = 0L;98 233 int dummyPos; 99 234 if (!pos) 100 235 pos = &dummyPos; 101 236 *pos = -1; 237 if (ovector) 238 *ovector = 0; 239 102 240 if (i > s.size() || s.isNull()) 103 241 return UString::null(); 104 242 105 243 #ifdef HAVE_PCREPOSIX 106 CString buffer(s.cstring()); 107 int ovecsize = (nrSubPatterns+1)*3; // see pcre docu 108 if (ovector) *ovector = new int[ovecsize]; 109 110 if (!pcregex || pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), i, 111 0, ovector ? *ovector : 0L, ovecsize) == PCRE_ERROR_NOMATCH) 244 245 if (!_regex) 112 246 return UString::null(); 113 247 114 if (!ovector) 115 return UString::null(); // don't rely on the return value if you pass ovector==0 248 // Set up the offset vector for the result. 249 // First 2/3 used for result, the last third used by PCRE. 250 int *offsetVector; 251 int offsetVectorSize; 252 int fixedSizeOffsetVector[3]; 253 if (!ovector) { 254 offsetVectorSize = 3; 255 offsetVector = fixedSizeOffsetVector; 256 } else { 257 offsetVectorSize = (_numSubPatterns + 1) * 3; 258 offsetVector = new int [offsetVectorSize]; 259 } 260 261 const CString buffer(convertToUTF8(s)); 262 convertCharacterOffsetsToUTF8ByteOffsets(buffer.c_str(), &i, 1); 263 const int numMatches = pcre_exec(_regex, NULL, buffer.c_str(), buffer.size(), i, 0, offsetVector, offsetVectorSize); 264 265 if (numMatches < 0) { 266 #ifndef NDEBUG 267 if (numMatches != PCRE_ERROR_NOMATCH) 268 fprintf(stderr, "KJS: pcre_exec() failed with result %d\n", numMatches); 269 #endif 270 if (offsetVector != fixedSizeOffsetVector) 271 delete [] offsetVector; 272 return UString::null(); 273 } 274 275 convertUTF8ByteOffsetsToCharacterOffsets(buffer.c_str(), offsetVector, (numMatches == 0 ? 1 : numMatches) * 2); 276 277 *pos = offsetVector[0]; 278 if (ovector) 279 *ovector = offsetVector; 280 return s.substr(offsetVector[0], offsetVector[1] - offsetVector[0]); 281 116 282 #else 283 117 284 const uint maxMatch = 10; 118 285 regmatch_t rmatch[maxMatch]; 119 286 120 287 char *str = strdup(s.ascii()); // TODO: why ??? 121 if (regexec(& preg, str + i, maxMatch, rmatch, 0)) {288 if (regexec(&_regex, str + i, maxMatch, rmatch, 0)) { 122 289 free(str); 123 290 return UString::null(); … … 131 298 132 299 // map rmatch array to ovector used in PCRE case 133 nrSubPatterns = 0;300 _numSubPatterns = 0; 134 301 for(uint j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++) 135 nrSubPatterns++;136 int ovecsize = ( nrSubPatterns+1)*3; // see above302 _numSubPatterns++; 303 int ovecsize = (_numSubPatterns+1)*3; // see above 137 304 *ovector = new int[ovecsize]; 138 for (uint j = 0; j < nrSubPatterns + 1; j++) {305 for (uint j = 0; j < _numSubPatterns + 1; j++) { 139 306 if (j>maxMatch) 140 307 break; … … 142 309 (*ovector)[2*j+1] = rmatch[j].rm_eo + i; 143 310 } 144 #endif145 311 146 312 *pos = (*ovector)[0]; 147 313 return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]); 148 } 149 150 #if 0 // unused 151 bool RegExp::test(const UString &s, int) 152 { 153 #ifdef HAVE_PCREPOSIX 154 int ovector[300]; 155 CString buffer(s.cstring()); 156 157 if (s.isNull() || 158 pcre_exec(pcregex, NULL, buffer.c_str(), buffer.size(), 0, 159 0, ovector, 300) == PCRE_ERROR_NOMATCH) 160 return false; 161 else 162 return true; 163 164 #else 165 166 char *str = strdup(s.ascii()); 167 int r = regexec(&preg, str, 0, 0, 0); 168 free(str); 169 170 return r == 0; 171 #endif 172 } 173 #endif 314 315 #endif 316 }
Note:
See TracChangeset
for help on using the changeset viewer.