Ignore:
Timestamp:
Oct 11, 2005, 1:43:49 PM (20 years ago)
Author:
ggaren
Message:
  • Implemented caching of match state inside the global RegExp object (lastParen, leftContext, rightContext, lastMatch, input).

exec(), test(), match(), search(), and replace() now dipatch regular
expression matching through the RegExp object's performMatch function,
to facilitate caching. This replaces registerRegexp and
setSubPatterns.

  • Implemented the special '$' aliases (e.g. RegExp.input aliases to RegExp.$_).
  • Moved support for backreferences into the new static hash table used for other special RegExp properties. Truncated backreferences at $9 to match IE, FF, and the "What's New in Netscape 1.2?" doc. (String.replace still supports double-digit backreferences.)
  • Tweaked RegExp.prototype.exec to handle ginormous values in lastIndex.

Fixes 11 -- count em, 11 -- JavaScriptCore tests.

Reviewed by NOBODY (OOPS!).

  • JavaScriptCore.xcodeproj/project.pbxproj: Added regexp_object.lut.h
  • kjs/create_hash_table: Tweaked to allow for more exotic characters.

We now rely on the compiler to catch illegal
identifiers.

  • kjs/regexp.cpp: (KJS::RegExp::RegExp):
  • kjs/regexp_object.cpp: (RegExpProtoFuncImp::callAsFunction): (RegExpObjectImp::RegExpObjectImp): (RegExpObjectImp::performMatch): (RegExpObjectImp::arrayOfMatches): (RegExpObjectImp::backrefGetter): (RegExpObjectImp::getLastMatch): (RegExpObjectImp::getLastParen): (RegExpObjectImp::getLeftContext): (RegExpObjectImp::getRightContext): (RegExpObjectImp::getOwnPropertySlot): (RegExpObjectImp::getValueProperty): (RegExpObjectImp::put): (RegExpObjectImp::putValueProperty):
  • kjs/regexp_object.h: (KJS::RegExpObjectImp::):
  • kjs/string_object.cpp: (substituteBackreferences): (replace): (StringProtoFuncImp::callAsFunction):
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/kjs/regexp_object.cpp

    r10701 r10818  
    2424#include "regexp_object.h"
    2525
     26#include "regexp_object.lut.h"
     27
    2628#include <stdio.h>
    2729#include "value.h"
     
    3335#include "regexp.h"
    3436#include "error_object.h"
     37#include "lookup.h"
    3538
    3639using namespace KJS;
     
    8083      }
    8184    }
     85   
    8286    return throwError(exec, TypeError);
    8387  }
    8488
    85   RegExpImp *reimp = static_cast<RegExpImp*>(thisObj);
    86   RegExp *re = reimp->regExp();
    87   UString s;
    88   UString str;
    8989  switch (id) {
    90   case Exec:      // 15.10.6.2
    91   case Test:
     90  case Test:      // 15.10.6.2
     91  case Exec:
    9292  {
    93     s = args[0]->toString(exec);
    94     int length = s.size();
    95     ValueImp *lastIndex = thisObj->get(exec,"lastIndex");
    96     int i = lastIndex->toInt32(exec);
    97     bool globalFlag = thisObj->get(exec,"global")->toBoolean(exec);
     93    RegExp *regExp = static_cast<RegExpImp*>(thisObj)->regExp();
     94    RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp());
     95
     96    UString input;
     97    if (args.isEmpty())
     98      input = regExpObj->get(exec, "input")->toString(exec);
     99    else
     100      input = args[0]->toString(exec);
     101
     102    double lastIndex = thisObj->get(exec, "lastIndex")->toInteger(exec);
     103
     104    bool globalFlag = thisObj->get(exec, "global")->toBoolean(exec);
    98105    if (!globalFlag)
    99       i = 0;
    100     if (i < 0 || i > length) {
    101       thisObj->put(exec,"lastIndex", Number(0), DontDelete | DontEnum);
    102       if (id == Test)
    103         return Boolean(false);
    104       else
    105         return Null();
    106     }
    107     RegExpObjectImp* regExpObj = static_cast<RegExpObjectImp*>(exec->lexicalInterpreter()->builtinRegExp());
    108     int **ovector = regExpObj->registerRegexp( re, s );
    109 
    110     str = re->match(s, i, 0L, ovector);
    111     regExpObj->setSubPatterns(re->subPatterns());
    112 
     106      lastIndex = 0;
     107    if (lastIndex < 0 || lastIndex > input.size()) {
     108      thisObj->put(exec, "lastIndex", jsZero(), DontDelete | DontEnum);
     109      return Null();
     110    }
     111
     112    UString match = regExpObj->performMatch(regExp, input, static_cast<int>(lastIndex));
     113    bool didMatch = !match.isNull();
     114
     115    // Test
    113116    if (id == Test)
    114       return Boolean(!str.isNull());
    115 
    116     if (str.isNull()) // no match
    117     {
     117      return Boolean(didMatch);
     118
     119    // Exec
     120    if (didMatch) {
    118121      if (globalFlag)
    119         thisObj->put(exec,"lastIndex",Number(0), DontDelete | DontEnum);
     122        thisObj->put(exec, "lastIndex", Number(lastIndex + match.size()), DontDelete | DontEnum);
     123      return regExpObj->arrayOfMatches(exec, match);
     124    } else {
     125      if (globalFlag)
     126        thisObj->put(exec, "lastIndex", jsZero(), DontDelete | DontEnum);
    120127      return Null();
    121     }
    122     else // success
    123     {
    124       if (globalFlag)
    125         thisObj->put(exec,"lastIndex",Number( (*ovector)[1] ), DontDelete | DontEnum);
    126       return regExpObj->arrayOfMatches(exec,str);
    127128    }
    128129  }
    129130  break;
    130131  case ToString:
    131     s = thisObj->get(exec,"source")->toString(exec);
    132     str = "/";
    133     str += s;
    134     str += "/";
    135     if (thisObj->get(exec,"global")->toBoolean(exec)) {
    136       str += "g";
    137     }
    138     if (thisObj->get(exec,"ignoreCase")->toBoolean(exec)) {
    139       str += "i";
    140     }
    141     if (thisObj->get(exec,"multiline")->toBoolean(exec)) {
    142       str += "m";
    143     }
    144     return String(str);
     132    UString result = "/" + thisObj->get(exec, "source")->toString(exec) + "/";
     133    if (thisObj->get(exec, "global")->toBoolean(exec)) {
     134      result += "g";
     135    }
     136    if (thisObj->get(exec, "ignoreCase")->toBoolean(exec)) {
     137      result += "i";
     138    }
     139    if (thisObj->get(exec, "multiline")->toBoolean(exec)) {
     140      result += "m";
     141    }
     142    return String(result);
    145143  }
    146144
     
    163161
    164162// ------------------------------ RegExpObjectImp ------------------------------
     163
     164const ClassInfo RegExpObjectImp::info = {"RegExp", &InternalFunctionImp::info, &RegExpTable, 0};
     165
     166/* Source for regexp_object.lut.h
     167@begin RegExpTable 20
     168  input           RegExpObjectImp::Input          None
     169  $_              RegExpObjectImp::Input          DontEnum
     170  multiline       RegExpObjectImp::Multiline      None
     171  $*              RegExpObjectImp::Multiline      DontEnum
     172  lastMatch       RegExpObjectImp::LastMatch      DontDelete|ReadOnly
     173  $&              RegExpObjectImp::LastMatch      DontDelete|ReadOnly|DontEnum
     174  lastParen       RegExpObjectImp::LastParen      DontDelete|ReadOnly
     175  $+              RegExpObjectImp::LastParen      DontDelete|ReadOnly|DontEnum
     176  leftContext     RegExpObjectImp::LeftContext    DontDelete|ReadOnly
     177  $`              RegExpObjectImp::LeftContext    DontDelete|ReadOnly|DontEnum
     178  rightContext    RegExpObjectImp::RightContext   DontDelete|ReadOnly
     179  $'              RegExpObjectImp::RightContext   DontDelete|ReadOnly|DontEnum
     180  $1              RegExpObjectImp::Dollar1        DontDelete|ReadOnly
     181  $2              RegExpObjectImp::Dollar2        DontDelete|ReadOnly
     182  $3              RegExpObjectImp::Dollar3        DontDelete|ReadOnly
     183  $4              RegExpObjectImp::Dollar4        DontDelete|ReadOnly
     184  $5              RegExpObjectImp::Dollar5        DontDelete|ReadOnly
     185  $6              RegExpObjectImp::Dollar6        DontDelete|ReadOnly
     186  $7              RegExpObjectImp::Dollar7        DontDelete|ReadOnly
     187  $8              RegExpObjectImp::Dollar8        DontDelete|ReadOnly
     188  $9              RegExpObjectImp::Dollar9        DontDelete|ReadOnly
     189@end
     190*/
    165191
    166192RegExpObjectImp::RegExpObjectImp(ExecState *exec,
     
    168194                                 RegExpPrototypeImp *regProto)
    169195
    170   : InternalFunctionImp(funcProto), lastOvector(0L), lastNrSubPatterns(0)
     196  : InternalFunctionImp(funcProto), multiline(false), lastInput(""), lastOvector(0), lastNumSubPatterns(0)
    171197{
    172198  // ECMA 15.10.5.1 RegExp.prototype
     
    182208}
    183209
    184 int **RegExpObjectImp::registerRegexp( const RegExp* re, const UString& s )
    185 {
    186   lastString = s;
    187   delete [] lastOvector;
    188   lastOvector = 0;
    189   lastNrSubPatterns = re->subPatterns();
    190   return &lastOvector;
     210/*
     211  To facilitate result caching, exec(), test(), match(), search(), and replace() dipatch regular
     212  expression matching through the performMatch function. We use cached results to calculate,
     213  e.g., RegExp.lastMatch and RegExp.leftParen.
     214*/
     215UString RegExpObjectImp::performMatch(RegExp* r, const UString& s, int startOffset, int *endOffset, int **ovector)
     216{
     217  int tmpOffset;
     218  int *tmpOvector;
     219  UString match = r->match(s, startOffset, &tmpOffset, &tmpOvector);
     220
     221  if (endOffset)
     222    *endOffset = tmpOffset;
     223  if (ovector)
     224    *ovector = tmpOvector;
     225 
     226  if (!match.isNull()) {
     227    assert(tmpOvector);
     228   
     229    lastInput = s;
     230    delete [] lastOvector;
     231    lastOvector = tmpOvector;
     232    lastNumSubPatterns = r->subPatterns();
     233  }
     234 
     235  return match;
    191236}
    192237
     
    197242  list.append(String(result));
    198243  if ( lastOvector )
    199     for ( unsigned i = 1 ; i < lastNrSubPatterns + 1 ; ++i )
     244    for ( unsigned i = 1 ; i < lastNumSubPatterns + 1 ; ++i )
    200245    {
    201246      int start = lastOvector[2*i];
     
    203248        list.append(jsUndefined());
    204249      else {
    205         UString substring = lastString.substr( start, lastOvector[2*i+1] - start );
     250        UString substring = lastInput.substr( start, lastOvector[2*i+1] - start );
    206251        list.append(String(substring));
    207252      }
     
    209254  ObjectImp *arr = exec->lexicalInterpreter()->builtinArray()->construct(exec, list);
    210255  arr->put(exec, "index", Number(lastOvector[0]));
    211   arr->put(exec, "input", String(lastString));
     256  arr->put(exec, "input", String(lastInput));
    212257  return arr;
    213258}
    214259
    215 ValueImp *RegExpObjectImp::backrefGetter(ExecState *exec, const Identifier& propertyName, const PropertySlot& slot)
    216 {
    217   RegExpObjectImp *thisObj = static_cast<RegExpObjectImp *>(slot.slotBase());
    218   unsigned i = slot.index();
    219 
    220   if (i < thisObj->lastNrSubPatterns + 1) {
    221     int *lastOvector = thisObj->lastOvector;
    222     UString substring = thisObj->lastString.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
     260ValueImp *RegExpObjectImp::getBackref(unsigned i) const
     261{
     262  if (lastOvector && i < lastNumSubPatterns + 1) {
     263    UString substring = lastInput.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i] );
    223264    return String(substring);
    224265  }
     
    227268}
    228269
     270ValueImp *RegExpObjectImp::getLastMatch() const
     271{
     272  if (lastOvector) {
     273    UString substring = lastInput.substr(lastOvector[0], lastOvector[1] - lastOvector[0]);
     274    return String(substring);
     275  }
     276 
     277  return String("");
     278}
     279
     280ValueImp *RegExpObjectImp::getLastParen() const
     281{
     282  int i = lastNumSubPatterns;
     283  if (i > 0) {
     284    assert(lastOvector);
     285    UString substring = lastInput.substr(lastOvector[2*i], lastOvector[2*i+1] - lastOvector[2*i]);
     286    return String(substring);
     287  }
     288   
     289  return String("");
     290}
     291
     292ValueImp *RegExpObjectImp::getLeftContext() const
     293{
     294  if (lastOvector) {
     295    UString substring = lastInput.substr(0, lastOvector[0]);
     296    return String(substring);
     297  }
     298 
     299  return String("");
     300}
     301
     302ValueImp *RegExpObjectImp::getRightContext() const
     303{
     304  if (lastOvector) {
     305    UString s = lastInput;
     306    UString substring = s.substr(lastOvector[1], s.size() - lastOvector[1]);
     307    return String(substring);
     308  }
     309 
     310  return String("");
     311}
     312
    229313bool RegExpObjectImp::getOwnPropertySlot(ExecState *exec, const Identifier& propertyName, PropertySlot& slot)
    230314{
    231   UString s = propertyName.ustring();
    232   if (s[0] == '$' && lastOvector)
    233   {
    234     bool ok;
    235     unsigned i = s.substr(1).toUInt32(&ok);
    236     if (ok) {
    237       slot.setCustomIndex(this, i, backrefGetter);
    238       return true;
    239     }
    240   }
    241 
    242   return InternalFunctionImp::getOwnPropertySlot(exec, propertyName, slot);
    243 }
    244 
     315  return getStaticValueSlot<RegExpObjectImp, InternalFunctionImp>(exec, &RegExpTable, this, propertyName, slot);
     316}
     317
     318ValueImp *RegExpObjectImp::getValueProperty(ExecState *exec, int token) const
     319{
     320  switch (token) {
     321    case Dollar1:
     322      return getBackref(1);
     323    case Dollar2:
     324      return getBackref(2);
     325    case Dollar3:
     326      return getBackref(3);
     327    case Dollar4:
     328      return getBackref(4);
     329    case Dollar5:
     330      return getBackref(5);
     331    case Dollar6:
     332      return getBackref(6);
     333    case Dollar7:
     334      return getBackref(7);
     335    case Dollar8:
     336      return getBackref(8);
     337    case Dollar9:
     338      return getBackref(9);
     339    case Input:
     340      return jsString(lastInput);
     341    case Multiline:
     342      return jsBoolean(multiline);
     343    case LastMatch:
     344      return getLastMatch();
     345    case LastParen:
     346      return getLastParen();
     347    case LeftContext:
     348      return getLeftContext();
     349    case RightContext:
     350      return getRightContext();
     351    default:
     352      assert(0);
     353  }
     354
     355  return String("");
     356}
     357
     358void RegExpObjectImp::put(ExecState *exec, const Identifier &propertyName, ValueImp *value, int attr)
     359{
     360  lookupPut<RegExpObjectImp, InternalFunctionImp>(exec, propertyName, value, attr, &RegExpTable, this);
     361}
     362
     363void RegExpObjectImp::putValueProperty(ExecState *exec, int token, ValueImp *value, int attr)
     364{
     365  switch (token) {
     366    case Input:
     367      lastInput = value->toString(exec);
     368      break;
     369    case Multiline:
     370      multiline = value->toBoolean(exec);
     371      break;
     372    default:
     373      assert(0);
     374  }
     375}
     376 
    245377bool RegExpObjectImp::implementsConstruct() const
    246378{
Note: See TracChangeset for help on using the changeset viewer.