Changeset 27571 in webkit for trunk/JavaScriptCore/kjs/regexp.cpp


Ignore:
Timestamp:
Nov 7, 2007, 9:18:39 AM (18 years ago)
Author:
[email protected]
Message:

JavaScriptCore:

Reviewed by Darin Adler.


Fixed part of http://bugs.webkit.org/show_bug.cgi?id=15861
15% of string-validate-input.js is spent compiling the same regular expression.

Put RegExpImp properties into a static hashtable to avoid a slew of
PropertyMap churn when creating a RegExpImp.


Factored important bits of regular expression implementation out of
RegExpImp (the JS object) and into RegExp (the PCRE wrapper class),
making RegExp a ref-counted class. (This will help later.)

Removed PCRE_POSIX support because I didn't quite know how to test it
and keep it working with these changes.


1.1% SunSpider speedup. 5.8% speedup on string-validate-input.js.

  • kjs/regexp.h: A few interface changes:
  1. Renamed "subpatterns()" => "numSubpatterns()"
  2. Made flag enumeration private and replaced it with public getters for specific flags.
  3. Made RegExp ref-counted so RegExps can be shared by RegExpImps.
  4. Made RegExp take a string of flags instead of an int, eliminating duplicated flag parsing code elsewhere.
  • kjs/regexp_object.cpp: (KJS::RegExpProtoFunc::callAsFunction): For RegExp.compile:
  • Fixed a bug where compile(undefined) would throw an exception.
  • Removed some now-redundant code.
  • Used RegExp sharing to eliminate an allocation and a bunch of PropertyMap thrash. (Not a big win since compile is a deprecated function. I mainly did this to test the plubming.)

LayoutTests:

Reviewed by Darin Adler.


Beefed up the RegExp.compile testcase to cover a mistake in the
original check-in and a mistake I made while developing my new patch.

  • fast/js/resources/regexp-compile.js:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/kjs/regexp.cpp

    r27419 r27571  
    3131namespace KJS {
    3232
    33 RegExp::RegExp(const UString &p, int flags)
    34   : m_flags(flags), m_constructionError(0), m_numSubPatterns(0)
     33RegExp::RegExp(const UString& pattern)
     34  : m_refCount(0)
     35  , m_pattern(pattern)
     36  , m_flags(0)
     37  , m_constructionError(0)
     38  , m_numSubpatterns(0)
    3539{
    36 #if !USE(POSIX_REGEX)
     40    const char* errorMessage;
     41    m_regExp = jsRegExpCompile(reinterpret_cast<const JSRegExpChar*>(m_pattern.data()), m_pattern.size(), 0, &m_numSubpatterns, &errorMessage);
     42    if (!m_regExp)
     43        m_constructionError = strdup(errorMessage);
     44}
    3745
    38   int options = 0;
    39   if (flags & IgnoreCase)
    40     options |= JS_REGEXP_CASELESS;
    41   if (flags & Multiline)
    42     options |= JS_REGEXP_MULTILINE;
     46RegExp::RegExp(const UString& pattern, const UString& flags)
     47  : m_refCount(0)
     48  , m_pattern(pattern)
     49  , m_flags(0)
     50  , m_constructionError(0)
     51  , m_numSubpatterns(0)
     52{
     53    // NOTE: The global flag is handled on a case-by-case basis by functions like
     54    // String::match and RegExpImp::match.
     55    if (flags.find('g') != -1)
     56        m_flags |= Global;
    4357
    44   const char* errorMessage;
    45   m_regex = jsRegExpCompile(reinterpret_cast<const JSRegExpChar*>(p.data()), p.size(), options,
    46     &m_numSubPatterns, &errorMessage);
    47   if (!m_regex) {
    48     m_constructionError = strdup(errorMessage);
    49     return;
    50   }
     58    // FIXME: Eliminate duplication by adding a way ask a JSRegExp what its flags are.
     59    int options = 0;
     60    if (flags.find('i') != -1) {
     61        m_flags |= IgnoreCase;
     62        options |= JS_REGEXP_CASELESS;
     63    }
    5164
    52 #else /* USE(POSIX_REGEX) */
    53 
    54   int regflags = 0;
    55 #ifdef REG_EXTENDED
    56   regflags |= REG_EXTENDED;
    57 #endif
    58 #ifdef REG_ICASE
    59   if (flags & IgnoreCase)
    60     regflags |= REG_ICASE;
    61 #endif
    62 
    63   //NOTE: Multiline is not feasible with POSIX regex.
    64   //if ( f & Multiline )
    65   //    ;
    66   // Note: the Global flag is already handled by RegExpProtoFunc::execute
    67 
    68   // FIXME: support \u Unicode escapes.
    69 
    70   int errorCode = regcomp(&m_regex, p.ascii(), regflags);
    71   if (errorCode != 0) {
    72     char errorMessage[80];
    73     regerror(errorCode, &m_regex, errorMessage, sizeof errorMessage);
    74     m_constructionError = strdup(errorMessage);
    75   }
    76 
    77 #endif
     65    if (flags.find('m') != -1) {
     66        m_flags |= Multiline;
     67        options |= JS_REGEXP_MULTILINE;
     68    }
     69   
     70    const char* errorMessage;
     71    m_regExp = jsRegExpCompile(reinterpret_cast<const JSRegExpChar*>(m_pattern.data()), m_pattern.size(), options, &m_numSubpatterns, &errorMessage);
     72    if (!m_regExp)
     73        m_constructionError = strdup(errorMessage);
    7874}
    7975
    8076RegExp::~RegExp()
    8177{
    82 #if !USE(POSIX_REGEX)
    83   jsRegExpFree(m_regex);
    84 #else
    85   /* TODO: is this really okay after an error ? */
    86   regfree(&m_regex);
    87 #endif
     78  jsRegExpFree(m_regExp);
    8879  free(m_constructionError);
    8980}
     
    9990    return -1;
    10091
    101 #if !USE(POSIX_REGEX)
    102 
    103   if (!m_regex)
     92  if (!m_regExp)
    10493    return -1;
    10594
     
    113102    offsetVector = fixedSizeOffsetVector;
    114103  } else {
    115     offsetVectorSize = (m_numSubPatterns + 1) * 3;
     104    offsetVectorSize = (m_numSubpatterns + 1) * 3;
    116105    offsetVector = new int [offsetVectorSize];
    117106    ovector->set(offsetVector);
    118107  }
    119108
    120   int numMatches = jsRegExpExecute(m_regex, reinterpret_cast<const JSRegExpChar*>(s.data()), s.size(), i, offsetVector, offsetVectorSize);
     109  int numMatches = jsRegExpExecute(m_regExp, reinterpret_cast<const JSRegExpChar*>(s.data()), s.size(), i, offsetVector, offsetVectorSize);
    121110
    122111  if (numMatches < 0) {
     
    131120
    132121  return offsetVector[0];
    133 
    134 #else
    135 
    136   const unsigned maxMatch = 10;
    137   regmatch_t rmatch[maxMatch];
    138 
    139   char *str = strdup(s.ascii()); // TODO: why ???
    140   if (regexec(&m_regex, str + i, maxMatch, rmatch, 0)) {
    141     free(str);
    142     return UString::null();
    143   }
    144   free(str);
    145 
    146   if (!ovector) {
    147     *pos = rmatch[0].rm_so + i;
    148     return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
    149   }
    150 
    151   // map rmatch array to ovector used in PCRE case
    152   m_numSubPatterns = 0;
    153   for(unsigned j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
    154       m_numSubPatterns++;
    155   int ovecsize = (m_numSubPatterns+1)*3; // see above
    156   *ovector = new int[ovecsize];
    157   for (unsigned j = 0; j < m_numSubPatterns + 1; j++) {
    158     if (j>maxMatch)
    159       break;
    160     (*ovector)[2*j] = rmatch[j].rm_so + i;
    161     (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
    162   }
    163 
    164   *pos = (*ovector)[0];
    165   return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
    166 
    167 #endif
    168122}
    169123
Note: See TracChangeset for help on using the changeset viewer.