Changeset 27320 in webkit for trunk/JavaScriptCore/kjs/regexp.cpp


Ignore:
Timestamp:
Oct 31, 2007, 7:46:41 AM (18 years ago)
Author:
darin
Message:

Reviewed by Maciej.

Speeds things up 0.4% according to SunSpider.

  • kjs/config.h: Define USE(PCRE16) instead of HAVE(PCREPOSIX), because this library doesn't use the real PCRE -- it uses its own PCRE that works on UTF-16.
  • kjs/regexp.h: Removed a few unused functions. Changed the ifdef. Use Noncopyable. Change the return value of match.
  • kjs/regexp.cpp: (KJS::RegExp::RegExp): Call pcre_compile2, for a slight speed boost. (KJS::RegExp::~RegExp): PCRE16 rather than PCREPOSIX. (KJS::RegExp::match): Change to return the position as an int and the ovector as a OwnArrayPtr<int> for efficiency and clearer storage management.
  • kjs/regexp_object.h: Change performMatch and arrayOfMatches to no longer require a result string.
  • kjs/regexp_object.cpp: (RegExpProtoFunc::callAsFunction): Update for new signature of performMatch. (RegExpObjectImp::performMatch): Change so it doesn't return a string. (RegExpObjectImp::arrayOfMatches): Simplify by unifying the handling of the main result with the backreferences; now it doesn't need to take a result parameter. (RegExpObjectImp::getBackref): Minor tweaks. (RegExpObjectImp::getLastParen): Ditto. (RegExpObjectImp::getLeftContext): Ditto. (RegExpObjectImp::getRightContext): Ditto. (RegExpObjectImp::getValueProperty): Change LastMatch case to call getBackref(0) so we don't need a separate getLastMatch function.
  • kjs/string_object.cpp: (KJS::replace): Update to use new performMatch, including merging the matched string section with the other substrings. (KJS::StringProtoFunc::callAsFunction): Update functions to use the new performMatch and match. Also change to use OwnArrayPtr.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/kjs/regexp.cpp

    r26688 r27320  
    3434  : m_flags(flags), m_constructionError(0), m_numSubPatterns(0)
    3535{
    36 #if HAVE(PCREPOSIX)
     36#if USE(PCRE16)
    3737
    3838  int options = PCRE_UTF8;
    39   // Note: the Global flag is already handled by RegExpProtoFunc::execute.
    40   // FIXME: That last comment is dubious. Not all RegExps get run through RegExpProtoFunc::execute.
    4139  if (flags & IgnoreCase)
    4240    options |= PCRE_CASELESS;
     
    4442    options |= PCRE_MULTILINE;
    4543
    46   const char *errorMessage;
     44  const char* errorMessage;
    4745  int errorOffset;
    48  
    49   m_regex = pcre_compile(reinterpret_cast<const uint16_t*>(p.data()), p.size(),
    50                         options, &errorMessage, &errorOffset, NULL);
     46  m_regex = pcre_compile2(reinterpret_cast<const uint16_t*>(p.data()), p.size(),
     47                          options, NULL, &errorMessage, &errorOffset, NULL);
    5148  if (!m_regex) {
    5249    m_constructionError = strdup(errorMessage);
     
    5451  }
    5552
    56 #ifdef PCRE_INFO_CAPTURECOUNT
    5753  // Get number of subpatterns that will be returned.
    5854  pcre_fullinfo(m_regex, NULL, PCRE_INFO_CAPTURECOUNT, &m_numSubPatterns);
    59 #endif
    6055
    61 #else /* HAVE(PCREPOSIX) */
     56#else /* USE(PCRE16) */
    6257
    6358  int regflags = 0;
     
    8984RegExp::~RegExp()
    9085{
    91 #if HAVE(PCREPOSIX)
     86#if USE(PCRE16)
    9287  pcre_free(m_regex);
    9388#else
     
    9893}
    9994
    100 UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
     95int RegExp::match(const UString& s, int i, OwnArrayPtr<int>* ovector)
    10196{
    10297  if (i < 0)
    10398    i = 0;
    104   int dummyPos;
    105   if (!pos)
    106     pos = &dummyPos;
    107   *pos = -1;
    10899  if (ovector)
    109     *ovector = 0;
     100    ovector->clear();
    110101
    111102  if (i > s.size() || s.isNull())
    112     return UString::null();
     103    return -1;
    113104
    114 #if HAVE(PCREPOSIX)
     105#if USE(PCRE16)
    115106
    116107  if (!m_regex)
    117     return UString::null();
     108    return -1;
    118109
    119110  // Set up the offset vector for the result.
    120111  // First 2/3 used for result, the last third used by PCRE.
    121   int *offsetVector;
     112  int* offsetVector;
    122113  int offsetVectorSize;
    123114  int fixedSizeOffsetVector[3];
     
    128119    offsetVectorSize = (m_numSubPatterns + 1) * 3;
    129120    offsetVector = new int [offsetVectorSize];
     121    ovector->set(offsetVector);
    130122  }
    131123
    132   const int numMatches = pcre_exec(m_regex, NULL, reinterpret_cast<const uint16_t *>(s.data()), s.size(), i, 0, offsetVector, offsetVectorSize);
     124  int numMatches = pcre_exec(m_regex, NULL, reinterpret_cast<const uint16_t *>(s.data()), s.size(), i, 0, offsetVector, offsetVectorSize);
    133125
    134126  if (numMatches < 0) {
     
    137129      fprintf(stderr, "KJS: pcre_exec() failed with result %d\n", numMatches);
    138130#endif
    139     if (offsetVector != fixedSizeOffsetVector)
    140       delete [] offsetVector;
    141     return UString::null();
     131    if (ovector)
     132      ovector->clear();
     133    return -1;
    142134  }
    143135
    144   *pos = offsetVector[0];
    145   if (ovector)
    146     *ovector = offsetVector;
    147   return s.substr(offsetVector[0], offsetVector[1] - offsetVector[0]);
     136  return offsetVector[0];
    148137
    149138#else
     
    183172}
    184173
    185 bool RegExp::isHexDigit(UChar uc)
    186 {
    187   int c = uc.unicode();
    188   return (c >= '0' && c <= '9' ||
    189           c >= 'a' && c <= 'f' ||
    190           c >= 'A' && c <= 'F');
    191 }
    192 
    193 unsigned char RegExp::convertHex(int c)
    194 {
    195   if (c >= '0' && c <= '9')
    196     return static_cast<unsigned char>(c - '0');
    197   if (c >= 'a' && c <= 'f')
    198     return static_cast<unsigned char>(c - 'a' + 10);
    199   return static_cast<unsigned char>(c - 'A' + 10);
    200 }
    201 
    202 unsigned char RegExp::convertHex(int c1, int c2)
    203 {
    204   return ((convertHex(c1) << 4) + convertHex(c2));
    205 }
    206 
    207 UChar RegExp::convertUnicode(UChar uc1, UChar uc2, UChar uc3, UChar uc4)
    208 {
    209   int c1 = uc1.unicode();
    210   int c2 = uc2.unicode();
    211   int c3 = uc3.unicode();
    212   int c4 = uc4.unicode();
    213   return UChar((convertHex(c1) << 4) + convertHex(c2),
    214                (convertHex(c3) << 4) + convertHex(c4));
    215 }
    216 
    217174} // namespace KJS
Note: See TracChangeset for help on using the changeset viewer.