Ignore:
Timestamp:
Nov 4, 2007, 1:28:22 AM (18 years ago)
Author:
Darin Adler
Message:

Reviewed by Maciej.

SunSpider says it's 2.6% faster overall, 32.5% in the regular expression tests.

  • pcre/pcre_internal.h: Added OP_ASCII_CHAR and OP_ASCII_LETTER_NC.
  • pcre/pcre_compile.c: (find_fixedlength): Added cases for OP_ASCII_CHAR and OP_ASCII_LETTER_NC. Also added OP_NOT since there was no reason it should not be in here. (could_be_empty_branch): Ditto. (compile_branch): Streamlined all the single-character cases; there was a bit of duplicate code. Added cases for OP_ASCII_CHAR and OP_ASCII_LETTER_NC as needed. But in particular, compile to those opcodes when the single character match is ASCII. (find_firstassertedchar): Added cases for OP_ASCII_CHAR and OP_ASCII_LETTER_NC.
  • pcre/pcre_exec.c: (match): Removed the "min", "minimize", and "op" fields from the matchframe, after I discovered that none of them needed to be saved and restored across recursive match calls. Also eliminated the ignored result field from the matchframe, since I discovered that rrc ("recursive result code") was already the exact same thing. Moved the handling of opcodes higher than OP_BRA into the default statement of the switch instead of doing them before the switch. This removes a branch from each iteration of the opcode interpreter, just as removal of "op" removed at least one store from each iteration. Last, but not least, add the OP_ASCII_CHAR and OP_ASCII_LETTER_NC functions. Neither can ever match a surrogate pair and the letter case can be handled efficiently.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/pcre/pcre_exec.c

    r27421 r27422  
    217217
    218218
    219 #define RMATCH(num,rx,rb,rf,rg)\
     219#define RMATCH(num, ra, rb, rc)\
    220220  {\
    221221  if (frame >= stackframes && frame + 1 < stackframesend)\
     
    225225  frame->where = RMATCH_WHERE(num);\
    226226  newframe->eptr = frame->eptr;\
    227   newframe->ecode = rb;\
     227  newframe->ecode = (ra);\
    228228  newframe->offset_top = frame->offset_top;\
    229   newframe->eptrb = rf;\
    230   is_group_start = rg;\
     229  newframe->eptrb = (rb);\
     230  is_group_start = (rc);\
    231231  ++rdepth;\
    232232  newframe->prevframe = frame;\
     
    235235  goto HEAP_RECURSE;\
    236236RRETURN_##num:\
     237  --rdepth;\
    237238  DPRINTF(("did a goto back to line %d\n", __LINE__));\
    238   rx = result;\
    239   --rdepth;\
    240239  }
    241240
     
    248247  if (frame != NULL)\
    249248    {\
    250     result = ra;\
     249    rrc = (ra);\
    251250    goto RRETURN_LABEL;\
    252251    }\
    253252  return ra;\
    254253  }
    255 
    256254
    257255/* Structure for remembering the local variables in a private frame */
     
    275273  const pcre_uchar *saved_eptr;
    276274
    277   BOOL minimize;
    278 
    279275  int repeat_othercase;
    280276
     
    284280  int length;
    285281  int max;
    286   int min;
    287282  int number;
    288283  int offset;
    289   int op;
    290284  int save_offset1, save_offset2, save_offset3;
    291285
     
    319313same response.
    320314
    321 Performance note: It might be tempting to extract commonly used fields from the
    322 md structure (e.g. utf8, end_subject) into individual variables to improve
    323 performance. Tests using gcc on a SPARC disproved this; in the first case, it
    324 made performance worse.
    325 
    326315Arguments:
    327316   eptr        pointer in subject
     
    351340BOOL prev_is_word;
    352341BOOL is_group_start = TRUE;
     342int min;
     343BOOL minimize = FALSE; /* Initialization not really needed, but some compilers think so. */
    353344
    354345/* When recursion is not being used, all "local" variables that have to be
     
    365356matchframe *frame = stackframes;
    366357matchframe *newframe;
    367 int result;
    368358frame->prevframe = NULL;            /* Marks the top level */
    369359
     
    409399for (;;)
    410400  {
    411   frame->op = *frame->ecode;
    412   frame->minimize = FALSE;
    413 
    414   /* Opening capturing bracket. If there is space in the offset vector, save
    415   the current subject position in the working slot at the top of the vector. We
    416   mustn't change the current values of the data slot, because they may be set
    417   from a previous iteration of this group, and be referred to by a reference
    418   inside the group.
    419 
    420   If the bracket fails to match, we need to restore this value and also the
    421   values of the final offsets, in case they were set by a previous iteration of
    422   the same bracket.
    423 
    424   If there isn't enough space in the offset vector, treat this as if it were a
    425   non-capturing bracket. Don't worry about setting the flag for the error case
    426   here; that is handled in the code for KET. */
    427 
    428   if (frame->op > OP_BRA)
    429     {
    430     frame->number = frame->op - OP_BRA;
    431 
    432     /* For extended extraction brackets (large number), we have to fish out the
    433     number from a dummy opcode at the start. */
    434 
    435     if (frame->number > EXTRACT_BASIC_MAX)
    436       frame->number = GET2(frame->ecode, 2+LINK_SIZE);
    437     frame->offset = frame->number << 1;
    438 
    439 #ifdef DEBUG
    440     printf("start bracket %d subject=", frame->number);
    441     pchars(frame->eptr, 16, TRUE, md);
    442     printf("\n");
    443 #endif
    444 
    445     if (frame->offset < md->offset_max)
    446       {
    447       frame->save_offset1 = md->offset_vector[frame->offset];
    448       frame->save_offset2 = md->offset_vector[frame->offset+1];
    449       frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
    450 
    451       DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
    452       md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
    453 
    454       do
    455         {
    456         RMATCH(1, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
    457         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    458         frame->ecode += GET(frame->ecode, 1);
    459         }
    460       while (*frame->ecode == OP_ALT);
    461 
    462       DPRINTF(("bracket %d failed\n", frame->number));
    463 
    464       md->offset_vector[frame->offset] = frame->save_offset1;
    465       md->offset_vector[frame->offset+1] = frame->save_offset2;
    466       md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
    467 
    468       RRETURN(MATCH_NOMATCH);
    469       }
    470 
    471     /* Insufficient room for saving captured contents */
    472 
    473     else frame->op = OP_BRA;
    474     }
    475 
    476   /* Other types of node can be handled by a switch */
    477 
    478   switch(frame->op)
     401  switch (*frame->ecode)
    479402    {
    480403    case OP_BRA:     /* Non-capturing bracket: optimized */
     404    NON_CAPTURING_BRACKET:
    481405    DPRINTF(("start bracket 0\n"));
    482406    do
    483407      {
    484       RMATCH(2, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
     408      RMATCH(2, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
    485409      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    486410      frame->ecode += GET(frame->ecode, 1);
     
    506430    do
    507431      {
    508       RMATCH(6, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
     432      RMATCH(6, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
    509433      if (rrc == MATCH_MATCH) break;
    510434      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     
    527451    do
    528452      {
    529       RMATCH(7, rrc, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
     453      RMATCH(7, frame->ecode + 1 + LINK_SIZE, NULL, match_isgroup);
    530454      if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
    531455      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     
    551475      do
    552476        {
    553         RMATCH(9, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
     477        RMATCH(9, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
    554478        if (rrc == MATCH_MATCH) break;
    555479        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     
    589513      if (*frame->ecode == OP_KETRMIN)
    590514        {
    591         RMATCH(10, rrc, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
     515        RMATCH(10, frame->ecode + 1 + LINK_SIZE, frame->eptrb, 0);
    592516        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    593         RMATCH(11, rrc, frame->prev, frame->eptrb, match_isgroup);
     517        RMATCH(11, frame->prev, frame->eptrb, match_isgroup);
    594518        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    595519        }
    596520      else  /* OP_KETRMAX */
    597521        {
    598         RMATCH(12, rrc, frame->prev, frame->eptrb, match_isgroup);
     522        RMATCH(12, frame->prev, frame->eptrb, match_isgroup);
    599523        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    600         RMATCH(13, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
     524        RMATCH(13, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
    601525        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    602526        }
     
    620544      {
    621545      frame->next = frame->ecode+1;
    622       RMATCH(14, rrc, frame->next, frame->eptrb, match_isgroup);
     546      RMATCH(14, frame->next, frame->eptrb, match_isgroup);
    623547      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    624548      do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
     
    631555      frame->next = frame->ecode+1;
    632556      do frame->next += GET(frame->next,1); while (*frame->next == OP_ALT);
    633       RMATCH(15, rrc, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
     557      RMATCH(15, frame->next + 1+LINK_SIZE, frame->eptrb, match_isgroup);
    634558      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    635559      frame->ecode++;
     
    712636      if (*frame->ecode == OP_KETRMIN)
    713637        {
    714         RMATCH(16, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
     638        RMATCH(16, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
    715639        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    716         RMATCH(17, rrc, frame->prev, frame->eptrb, match_isgroup);
     640        RMATCH(17, frame->prev, frame->eptrb, match_isgroup);
    717641        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    718642        }
    719643      else  /* OP_KETRMAX */
    720644        {
    721         RMATCH(18, rrc, frame->prev, frame->eptrb, match_isgroup);
     645        RMATCH(18, frame->prev, frame->eptrb, match_isgroup);
    722646        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    723         RMATCH(19, rrc, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
     647        RMATCH(19, frame->ecode + 1+LINK_SIZE, frame->eptrb, 0);
    724648        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    725649        }
     
    904828        case OP_CRMINQUERY:
    905829        c = *frame->ecode++ - OP_CRSTAR;
    906         frame->minimize = (c & 1) != 0;
    907         frame->min = rep_min[c];                 /* Pick up values from tables; */
     830        minimize = (c & 1) != 0;
     831        min = rep_min[c];                 /* Pick up values from tables; */
    908832        frame->max = rep_max[c];                 /* zero for max => infinity */
    909833        if (frame->max == 0) frame->max = INT_MAX;
     
    912836        case OP_CRRANGE:
    913837        case OP_CRMINRANGE:
    914         frame->minimize = (*frame->ecode == OP_CRMINRANGE);
    915         frame->min = GET2(frame->ecode, 1);
     838        minimize = (*frame->ecode == OP_CRMINRANGE);
     839        min = GET2(frame->ecode, 1);
    916840        frame->max = GET2(frame->ecode, 3);
    917841        if (frame->max == 0) frame->max = INT_MAX;
     
    932856      /* First, ensure the minimum number of matches are present. */
    933857
    934       for (i = 1; i <= frame->min; i++)
     858      for (i = 1; i <= min; i++)
    935859        {
    936860        if (!match_ref(frame->offset, frame->eptr, frame->length, md)) RRETURN(MATCH_NOMATCH);
     
    941865      They are not both allowed to be zero. */
    942866
    943       if (frame->min == frame->max) continue;
     867      if (min == frame->max) continue;
    944868
    945869      /* If minimizing, keep trying and advancing the pointer */
    946870
    947       if (frame->minimize)
    948         {
    949         for (frame->fi = frame->min;; frame->fi++)
    950           {
    951           RMATCH(20, rrc, frame->ecode, frame->eptrb, 0);
     871      if (minimize)
     872        {
     873        for (frame->fi = min;; frame->fi++)
     874          {
     875          RMATCH(20, frame->ecode, frame->eptrb, 0);
    952876          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    953877          if (frame->fi >= frame->max || !match_ref(frame->offset, frame->eptr, frame->length, md))
     
    963887        {
    964888        frame->pp = frame->eptr;
    965         for (i = frame->min; i < frame->max; i++)
     889        for (i = min; i < frame->max; i++)
    966890          {
    967891          if (!match_ref(frame->offset, frame->eptr, frame->length, md)) break;
     
    970894        while (frame->eptr >= frame->pp)
    971895          {
    972           RMATCH(21, rrc, frame->ecode, frame->eptrb, 0);
     896          RMATCH(21, frame->ecode, frame->eptrb, 0);
    973897          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    974898          frame->eptr -= frame->length;
     
    1007931        case OP_CRMINQUERY:
    1008932        c = *frame->ecode++ - OP_CRSTAR;
    1009         frame->minimize = (c & 1) != 0;
    1010         frame->min = rep_min[c];                 /* Pick up values from tables; */
     933        minimize = (c & 1) != 0;
     934        min = rep_min[c];                 /* Pick up values from tables; */
    1011935        frame->max = rep_max[c];                 /* zero for max => infinity */
    1012936        if (frame->max == 0) frame->max = INT_MAX;
     
    1015939        case OP_CRRANGE:
    1016940        case OP_CRMINRANGE:
    1017         frame->minimize = (*frame->ecode == OP_CRMINRANGE);
    1018         frame->min = GET2(frame->ecode, 1);
     941        minimize = (*frame->ecode == OP_CRMINRANGE);
     942        min = GET2(frame->ecode, 1);
    1019943        frame->max = GET2(frame->ecode, 3);
    1020944        if (frame->max == 0) frame->max = INT_MAX;
     
    1023947
    1024948        default:               /* No repeat follows */
    1025         frame->min = frame->max = 1;
     949        min = frame->max = 1;
    1026950        break;
    1027951        }
     
    1030954
    1031955        {
    1032         for (i = 1; i <= frame->min; i++)
     956        for (i = 1; i <= min; i++)
    1033957          {
    1034958          if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    1036960          if (c > 255)
    1037961            {
    1038             if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
     962            if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
    1039963            }
    1040964          else
     
    1048972      need to recurse. */
    1049973
    1050       if (frame->min == frame->max) continue;
     974      if (min == frame->max) continue;
    1051975
    1052976      /* If minimizing, keep testing the rest of the expression and advancing
    1053977      the pointer while it matches the class. */
    1054978
    1055       if (frame->minimize)
    1056         {
    1057           {
    1058           for (frame->fi = frame->min;; frame->fi++)
    1059             {
    1060             RMATCH(22, rrc, frame->ecode, frame->eptrb, 0);
     979      if (minimize)
     980        {
     981          {
     982          for (frame->fi = min;; frame->fi++)
     983            {
     984            RMATCH(22, frame->ecode, frame->eptrb, 0);
    1061985            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    1062986            if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    1064988            if (c > 255)
    1065989              {
    1066               if (frame->op == OP_CLASS) RRETURN(MATCH_NOMATCH);
     990              if (frame->data[-1] == OP_CLASS) RRETURN(MATCH_NOMATCH);
    1067991              }
    1068992            else
     
    10821006
    10831007          {
    1084           for (i = frame->min; i < frame->max; i++)
     1008          for (i = min; i < frame->max; i++)
    10851009            {
    10861010            int len = 1;
     
    10891013            if (c > 255)
    10901014              {
    1091               if (frame->op == OP_CLASS) break;
     1015              if (frame->data[-1] == OP_CLASS) break;
    10921016              }
    10931017            else
     
    10991023          for (;;)
    11001024            {
    1101             RMATCH(24, rrc, frame->ecode, frame->eptrb, 0);
     1025            RMATCH(24, frame->ecode, frame->eptrb, 0);
    11021026            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    11031027            if (frame->eptr-- == frame->pp) break;        /* Stop if tried at original pos */
     
    11281052        case OP_CRMINQUERY:
    11291053        c = *frame->ecode++ - OP_CRSTAR;
    1130         frame->minimize = (c & 1) != 0;
    1131         frame->min = rep_min[c];                 /* Pick up values from tables; */
     1054        minimize = (c & 1) != 0;
     1055        min = rep_min[c];                 /* Pick up values from tables; */
    11321056        frame->max = rep_max[c];                 /* zero for max => infinity */
    11331057        if (frame->max == 0) frame->max = INT_MAX;
     
    11361060        case OP_CRRANGE:
    11371061        case OP_CRMINRANGE:
    1138         frame->minimize = (*frame->ecode == OP_CRMINRANGE);
    1139         frame->min = GET2(frame->ecode, 1);
     1062        minimize = (*frame->ecode == OP_CRMINRANGE);
     1063        min = GET2(frame->ecode, 1);
    11401064        frame->max = GET2(frame->ecode, 3);
    11411065        if (frame->max == 0) frame->max = INT_MAX;
     
    11441068
    11451069        default:               /* No repeat follows */
    1146         frame->min = frame->max = 1;
     1070        min = frame->max = 1;
    11471071        break;
    11481072        }
     
    11501074      /* First, ensure the minimum number of matches are present. */
    11511075
    1152       for (i = 1; i <= frame->min; i++)
     1076      for (i = 1; i <= min; i++)
    11531077        {
    11541078        if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    11601084      need to recurse. */
    11611085
    1162       if (frame->min == frame->max) continue;
     1086      if (min == frame->max) continue;
    11631087
    11641088      /* If minimizing, keep testing the rest of the expression and advancing
    11651089      the pointer while it matches the class. */
    11661090
    1167       if (frame->minimize)
    1168         {
    1169         for (frame->fi = frame->min;; frame->fi++)
    1170           {
    1171           RMATCH(26, rrc, frame->ecode, frame->eptrb, 0);
     1091      if (minimize)
     1092        {
     1093        for (frame->fi = min;; frame->fi++)
     1094          {
     1095          RMATCH(26, frame->ecode, frame->eptrb, 0);
    11721096          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    11731097          if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    11831107        {
    11841108        frame->pp = frame->eptr;
    1185         for (i = frame->min; i < frame->max; i++)
     1109        for (i = min; i < frame->max; i++)
    11861110          {
    11871111          int len = 1;
     
    11931117        for(;;)
    11941118          {
    1195           RMATCH(27, rrc, frame->ecode, frame->eptrb, 0);
     1119          RMATCH(27, frame->ecode, frame->eptrb, 0);
    11961120          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    11971121          if (frame->eptr-- == frame->pp) break;        /* Stop if tried at original pos */
     
    12771201    break;
    12781202
     1203    /* Match a single ASCII character. */
     1204
     1205    case OP_ASCII_CHAR:
     1206    if (md->end_subject == frame->eptr)
     1207      RRETURN(MATCH_NOMATCH);
     1208    if (*frame->eptr != frame->ecode[1])
     1209      RRETURN(MATCH_NOMATCH);
     1210    ++frame->eptr;
     1211    frame->ecode += 2;
     1212    break;
     1213
     1214    /* Match one of two cases of an ASCII character. */
     1215
     1216    case OP_ASCII_LETTER_NC:
     1217    if (md->end_subject == frame->eptr)
     1218      RRETURN(MATCH_NOMATCH);
     1219    if ((*frame->eptr | 0x20) != frame->ecode[1])
     1220      RRETURN(MATCH_NOMATCH);
     1221    ++frame->eptr;
     1222    frame->ecode += 2;
     1223    break;
     1224
    12791225    /* Match a single character repeatedly; different opcodes share code. */
    12801226
    12811227    case OP_EXACT:
    1282     frame->min = frame->max = GET2(frame->ecode, 1);
     1228    min = frame->max = GET2(frame->ecode, 1);
     1229    minimize = FALSE;
    12831230    frame->ecode += 3;
    12841231    goto REPEATCHAR;
     
    12861233    case OP_UPTO:
    12871234    case OP_MINUPTO:
    1288     frame->min = 0;
     1235    min = 0;
    12891236    frame->max = GET2(frame->ecode, 1);
    1290     frame->minimize = *frame->ecode == OP_MINUPTO;
     1237    minimize = *frame->ecode == OP_MINUPTO;
    12911238    frame->ecode += 3;
    12921239    goto REPEATCHAR;
     
    12991246    case OP_MINQUERY:
    13001247    c = *frame->ecode++ - OP_STAR;
    1301     frame->minimize = (c & 1) != 0;
    1302     frame->min = rep_min[c];                 /* Pick up values from tables; */
     1248    minimize = (c & 1) != 0;
     1249    min = rep_min[c];                 /* Pick up values from tables; */
    13031250    frame->max = rep_max[c];                 /* zero for max => infinity */
    13041251    if (frame->max == 0) frame->max = INT_MAX;
     
    13131260      GETUTF8CHARLEN(frame->fc, frame->ecode, frame->length);
    13141261      {
    1315       if (frame->min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
     1262      if (min * (frame->fc > 0xFFFF ? 2 : 1) > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
    13161263      frame->ecode += frame->length;
    13171264
     
    13201267        int othercase = md->caseless ? _pcre_ucp_othercase(frame->fc) : -1;
    13211268
    1322         for (i = 1; i <= frame->min; i++)
     1269        for (i = 1; i <= min; i++)
    13231270          {
    13241271          if (*frame->eptr != frame->fc && *frame->eptr != othercase) RRETURN(MATCH_NOMATCH);
     
    13261273          }
    13271274
    1328         if (frame->min == frame->max) continue;
    1329 
    1330         if (frame->minimize)
     1275        if (min == frame->max) continue;
     1276
     1277        if (minimize)
    13311278          {
    13321279          frame->repeat_othercase = othercase;
    1333           for (frame->fi = frame->min;; frame->fi++)
    1334             {
    1335             RMATCH(28, rrc, frame->ecode, frame->eptrb, 0);
     1280          for (frame->fi = min;; frame->fi++)
     1281            {
     1282            RMATCH(28, frame->ecode, frame->eptrb, 0);
    13361283            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    13371284            if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    13441291          {
    13451292          frame->pp = frame->eptr;
    1346           for (i = frame->min; i < frame->max; i++)
     1293          for (i = min; i < frame->max; i++)
    13471294            {
    13481295            if (frame->eptr >= md->end_subject) break;
     
    13521299          while (frame->eptr >= frame->pp)
    13531300           {
    1354            RMATCH(29, rrc, frame->ecode, frame->eptrb, 0);
     1301           RMATCH(29, frame->ecode, frame->eptrb, 0);
    13551302           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    13561303           --frame->eptr;
     
    13641311        /* No case on surrogate pairs, so no need to bother with "othercase". */
    13651312
    1366         for (i = 1; i <= frame->min; i++)
     1313        for (i = 1; i <= min; i++)
    13671314          {
    13681315          int nc;
     
    13721319          }
    13731320
    1374         if (frame->min == frame->max) continue;
    1375 
    1376         if (frame->minimize)
    1377           {
    1378           for (frame->fi = frame->min;; frame->fi++)
     1321        if (min == frame->max) continue;
     1322
     1323        if (minimize)
     1324          {
     1325          for (frame->fi = min;; frame->fi++)
    13791326            {
    13801327            int nc;
    1381             RMATCH(30, rrc, frame->ecode, frame->eptrb, 0);
     1328            RMATCH(30, frame->ecode, frame->eptrb, 0);
    13821329            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    13831330            if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    13911338          {
    13921339          frame->pp = frame->eptr;
    1393           for (i = frame->min; i < frame->max; i++)
     1340          for (i = min; i < frame->max; i++)
    13941341            {
    13951342            int nc;
     
    14011348          while (frame->eptr >= frame->pp)
    14021349           {
    1403            RMATCH(31, rrc, frame->ecode, frame->eptrb, 0);
     1350           RMATCH(31, frame->ecode, frame->eptrb, 0);
    14041351           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    14051352           frame->eptr -= 2;
     
    14391386
    14401387    case OP_NOTEXACT:
    1441     frame->min = frame->max = GET2(frame->ecode, 1);
     1388    min = frame->max = GET2(frame->ecode, 1);
     1389    minimize = FALSE;
    14421390    frame->ecode += 3;
    14431391    goto REPEATNOTCHAR;
     
    14451393    case OP_NOTUPTO:
    14461394    case OP_NOTMINUPTO:
    1447     frame->min = 0;
     1395    min = 0;
    14481396    frame->max = GET2(frame->ecode, 1);
    1449     frame->minimize = *frame->ecode == OP_NOTMINUPTO;
     1397    minimize = *frame->ecode == OP_NOTMINUPTO;
    14501398    frame->ecode += 3;
    14511399    goto REPEATNOTCHAR;
     
    14581406    case OP_NOTMINQUERY:
    14591407    c = *frame->ecode++ - OP_NOTSTAR;
    1460     frame->minimize = (c & 1) != 0;
    1461     frame->min = rep_min[c];                 /* Pick up values from tables; */
     1408    minimize = (c & 1) != 0;
     1409    min = rep_min[c];                 /* Pick up values from tables; */
    14621410    frame->max = rep_max[c];                 /* zero for max => infinity */
    14631411    if (frame->max == 0) frame->max = INT_MAX;
     
    14681416
    14691417    REPEATNOTCHAR:
    1470     if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
     1418    if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
    14711419    frame->fc = *frame->ecode++;
    14721420
     
    14791427    characters and work backwards. */
    14801428
    1481     DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, frame->min, frame->max));
     1429    DPRINTF(("negative matching %c{%d,%d}\n", frame->fc, min, frame->max));
    14821430
    14831431    if (md->caseless)
     
    14871435        {
    14881436        register int d;
    1489         for (i = 1; i <= frame->min; i++)
     1437        for (i = 1; i <= min; i++)
    14901438          {
    14911439          GETCHARINC(d, frame->eptr);
     
    14951443        }
    14961444
    1497       if (frame->min == frame->max) continue;
    1498 
    1499       if (frame->minimize)
     1445      if (min == frame->max) continue;
     1446
     1447      if (minimize)
    15001448        {
    15011449          {
    15021450          register int d;
    1503           for (frame->fi = frame->min;; frame->fi++)
    1504             {
    1505             RMATCH(38, rrc, frame->ecode, frame->eptrb, 0);
     1451          for (frame->fi = min;; frame->fi++)
     1452            {
     1453            RMATCH(38, frame->ecode, frame->eptrb, 0);
    15061454            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    15071455            GETCHARINC(d, frame->eptr);
     
    15221470          {
    15231471          register int d;
    1524           for (i = frame->min; i < frame->max; i++)
     1472          for (i = min; i < frame->max; i++)
    15251473            {
    15261474            int len = 1;
     
    15331481          for(;;)
    15341482            {
    1535             RMATCH(40, rrc, frame->ecode, frame->eptrb, 0);
     1483            RMATCH(40, frame->ecode, frame->eptrb, 0);
    15361484            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    15371485            if (frame->eptr-- == frame->pp) break;        /* Stop if tried at original pos */
     
    15511499        {
    15521500        register int d;
    1553         for (i = 1; i <= frame->min; i++)
     1501        for (i = 1; i <= min; i++)
    15541502          {
    15551503          GETCHARINC(d, frame->eptr);
     
    15581506        }
    15591507
    1560       if (frame->min == frame->max) continue;
    1561 
    1562       if (frame->minimize)
     1508      if (min == frame->max) continue;
     1509
     1510      if (minimize)
    15631511        {
    15641512          {
    15651513          register int d;
    1566           for (frame->fi = frame->min;; frame->fi++)
    1567             {
    1568             RMATCH(42, rrc, frame->ecode, frame->eptrb, 0);
     1514          for (frame->fi = min;; frame->fi++)
     1515            {
     1516            RMATCH(42, frame->ecode, frame->eptrb, 0);
    15691517            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    15701518            GETCHARINC(d, frame->eptr);
     
    15841532          {
    15851533          register int d;
    1586           for (i = frame->min; i < frame->max; i++)
     1534          for (i = min; i < frame->max; i++)
    15871535            {
    15881536            int len = 1;
     
    15941542          for(;;)
    15951543            {
    1596             RMATCH(44, rrc, frame->ecode, frame->eptrb, 0);
     1544            RMATCH(44, frame->ecode, frame->eptrb, 0);
    15971545            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    15981546            if (frame->eptr-- == frame->pp) break;        /* Stop if tried at original pos */
     
    16111559
    16121560    case OP_TYPEEXACT:
    1613     frame->min = frame->max = GET2(frame->ecode, 1);
    1614     frame->minimize = TRUE;
     1561    min = frame->max = GET2(frame->ecode, 1);
     1562    minimize = TRUE;
    16151563    frame->ecode += 3;
    16161564    goto REPEATTYPE;
     
    16181566    case OP_TYPEUPTO:
    16191567    case OP_TYPEMINUPTO:
    1620     frame->min = 0;
     1568    min = 0;
    16211569    frame->max = GET2(frame->ecode, 1);
    1622     frame->minimize = *frame->ecode == OP_TYPEMINUPTO;
     1570    minimize = *frame->ecode == OP_TYPEMINUPTO;
    16231571    frame->ecode += 3;
    16241572    goto REPEATTYPE;
     
    16311579    case OP_TYPEMINQUERY:
    16321580    c = *frame->ecode++ - OP_TYPESTAR;
    1633     frame->minimize = (c & 1) != 0;
    1634     frame->min = rep_min[c];                 /* Pick up values from tables; */
     1581    minimize = (c & 1) != 0;
     1582    min = rep_min[c];                 /* Pick up values from tables; */
    16351583    frame->max = rep_max[c];                 /* zero for max => infinity */
    16361584    if (frame->max == 0) frame->max = INT_MAX;
     
    16511599    and single-bytes. */
    16521600
    1653     if (frame->min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
    1654     if (frame->min > 0)
     1601    if (min > md->end_subject - frame->eptr) RRETURN(MATCH_NOMATCH);
     1602    if (min > 0)
    16551603      {
    16561604      switch(frame->ctype)
    16571605        {
    16581606        case OP_ANY:
    1659         for (i = 1; i <= frame->min; i++)
     1607        for (i = 1; i <= min; i++)
    16601608          {
    16611609          if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr))
     
    16671615
    16681616        case OP_NOT_DIGIT:
    1669         for (i = 1; i <= frame->min; i++)
     1617        for (i = 1; i <= min; i++)
    16701618          {
    16711619          if (frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    16771625
    16781626        case OP_DIGIT:
    1679         for (i = 1; i <= frame->min; i++)
     1627        for (i = 1; i <= min; i++)
    16801628          {
    16811629          if (frame->eptr >= md->end_subject ||
     
    16871635
    16881636        case OP_NOT_WHITESPACE:
    1689         for (i = 1; i <= frame->min; i++)
     1637        for (i = 1; i <= min; i++)
    16901638          {
    16911639          if (frame->eptr >= md->end_subject ||
     
    16971645
    16981646        case OP_WHITESPACE:
    1699         for (i = 1; i <= frame->min; i++)
     1647        for (i = 1; i <= min; i++)
    17001648          {
    17011649          if (frame->eptr >= md->end_subject ||
     
    17071655
    17081656        case OP_NOT_WORDCHAR:
    1709         for (i = 1; i <= frame->min; i++)
     1657        for (i = 1; i <= min; i++)
    17101658          {
    17111659          if (frame->eptr >= md->end_subject ||
     
    17171665
    17181666        case OP_WORDCHAR:
    1719         for (i = 1; i <= frame->min; i++)
     1667        for (i = 1; i <= min; i++)
    17201668          {
    17211669          if (frame->eptr >= md->end_subject ||
     
    17331681    /* If min = max, continue at the same level without recursing */
    17341682
    1735     if (frame->min == frame->max) continue;
     1683    if (min == frame->max) continue;
    17361684
    17371685    /* If minimizing, we have to test the rest of the pattern before each
    1738     subsequent match. Again, separate the UTF-8 case for speed, and also
    1739     separate the UCP cases. */
    1740 
    1741     if (frame->minimize)
    1742       {
    1743         {
    1744         for (frame->fi = frame->min;; frame->fi++)
    1745           {
    1746           RMATCH(48, rrc, frame->ecode, frame->eptrb, 0);
     1686    subsequent match. */
     1687
     1688    if (minimize)
     1689      {
     1690        {
     1691        for (frame->fi = min;; frame->fi++)
     1692          {
     1693          RMATCH(48, frame->ecode, frame->eptrb, 0);
    17471694          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    17481695          if (frame->fi >= frame->max || frame->eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
     
    17941741
    17951742    /* If maximizing it is worth using inline code for speed, doing the type
    1796     test once at the start (i.e. keep it out of the loop). Again, keep the
    1797     UTF-8 and UCP stuff separate. */
     1743    test once at the start (i.e. keep it out of the loop). */
    17981744
    17991745    else
     
    18131759            {
    18141760              {
    1815               for (i = frame->min; i < frame->max; i++)
     1761              for (i = min; i < frame->max; i++)
    18161762                {
    18171763                if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
     
    18271773            {
    18281774              {
    1829               for (i = frame->min; i < frame->max; i++)
     1775              for (i = min; i < frame->max; i++)
    18301776                {
    18311777                if (frame->eptr >= md->end_subject || IS_NEWLINE(*frame->eptr)) break;
     
    18381784
    18391785          case OP_NOT_DIGIT:
    1840           for (i = frame->min; i < frame->max; i++)
     1786          for (i = min; i < frame->max; i++)
    18411787            {
    18421788            int len = 1;
     
    18491795
    18501796          case OP_DIGIT:
    1851           for (i = frame->min; i < frame->max; i++)
     1797          for (i = min; i < frame->max; i++)
    18521798            {
    18531799            int len = 1;
     
    18601806
    18611807          case OP_NOT_WHITESPACE:
    1862           for (i = frame->min; i < frame->max; i++)
     1808          for (i = min; i < frame->max; i++)
    18631809            {
    18641810            int len = 1;
     
    18711817
    18721818          case OP_WHITESPACE:
    1873           for (i = frame->min; i < frame->max; i++)
     1819          for (i = min; i < frame->max; i++)
    18741820            {
    18751821            int len = 1;
     
    18821828
    18831829          case OP_NOT_WORDCHAR:
    1884           for (i = frame->min; i < frame->max; i++)
     1830          for (i = min; i < frame->max; i++)
    18851831            {
    18861832            int len = 1;
     
    18931839
    18941840          case OP_WORDCHAR:
    1895           for (i = frame->min; i < frame->max; i++)
     1841          for (i = min; i < frame->max; i++)
    18961842            {
    18971843            int len = 1;
     
    19111857        for(;;)
    19121858          {
    1913           RMATCH(52, rrc, frame->ecode, frame->eptrb, 0);
     1859          RMATCH(52, frame->ecode, frame->eptrb, 0);
    19141860          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
    19151861          if (frame->eptr-- == frame->pp) break;        /* Stop if tried at original pos */
     
    19231869      }
    19241870    /* Control never gets here */
     1871
     1872    default:
     1873      /* Opening capturing bracket. If there is space in the offset vector, save
     1874      the current subject position in the working slot at the top of the vector. We
     1875      mustn't change the current values of the data slot, because they may be set
     1876      from a previous iteration of this group, and be referred to by a reference
     1877      inside the group.
     1878
     1879      If the bracket fails to match, we need to restore this value and also the
     1880      values of the final offsets, in case they were set by a previous iteration of
     1881      the same bracket.
     1882
     1883      If there isn't enough space in the offset vector, treat this as if it were a
     1884      non-capturing bracket. Don't worry about setting the flag for the error case
     1885      here; that is handled in the code for KET. */
     1886
     1887      if (*frame->ecode > OP_BRA)
     1888        {
     1889        frame->number = *frame->ecode - OP_BRA;
     1890
     1891        /* For extended extraction brackets (large number), we have to fish out the
     1892        number from a dummy opcode at the start. */
     1893
     1894        if (frame->number > EXTRACT_BASIC_MAX)
     1895          frame->number = GET2(frame->ecode, 2+LINK_SIZE);
     1896        frame->offset = frame->number << 1;
     1897
     1898#ifdef DEBUG
     1899        printf("start bracket %d subject=", frame->number);
     1900        pchars(frame->eptr, 16, TRUE, md);
     1901        printf("\n");
     1902#endif
     1903
     1904        if (frame->offset < md->offset_max)
     1905          {
     1906          frame->save_offset1 = md->offset_vector[frame->offset];
     1907          frame->save_offset2 = md->offset_vector[frame->offset+1];
     1908          frame->save_offset3 = md->offset_vector[md->offset_end - frame->number];
     1909
     1910          DPRINTF(("saving %d %d %d\n", frame->save_offset1, frame->save_offset2, frame->save_offset3));
     1911          md->offset_vector[md->offset_end - frame->number] = frame->eptr - md->start_subject;
     1912
     1913          do
     1914            {
     1915            RMATCH(1, frame->ecode + 1 + LINK_SIZE, frame->eptrb, match_isgroup);
     1916            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     1917            frame->ecode += GET(frame->ecode, 1);
     1918            }
     1919          while (*frame->ecode == OP_ALT);
     1920
     1921          DPRINTF(("bracket %d failed\n", frame->number));
     1922
     1923          md->offset_vector[frame->offset] = frame->save_offset1;
     1924          md->offset_vector[frame->offset+1] = frame->save_offset2;
     1925          md->offset_vector[md->offset_end - frame->number] = frame->save_offset3;
     1926
     1927          RRETURN(MATCH_NOMATCH);
     1928          }
     1929
     1930        /* Insufficient room for saving captured contents */
     1931
     1932        goto NON_CAPTURING_BRACKET;
     1933        }
    19251934
    19261935    /* There's been some horrible disaster. Since all codes > OP_BRA are
     
    19291938    in the code above or the OP_xxx definitions. */
    19301939
    1931     default:
    19321940    DPRINTF(("Unknown opcode %d\n", *frame->ecode));
    19331941    RRETURN(JS_REGEXP_ERROR_INTERNAL);
Note: See TracChangeset for help on using the changeset viewer.