Ignore:
Timestamp:
Nov 14, 2007, 6:40:31 PM (18 years ago)
Author:
[email protected]
Message:

2007-11-14 Eric Seidel <[email protected]>

Reviewed by Oliver.

More PCRE style cleanup.

  • pcre/pcre_compile.cpp: (compile_regex):
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/JavaScriptCore/pcre/pcre_compile.cpp

    r27805 r27807  
    20852085
    20862086static BOOL
    2087 compile_regex(int options, int *brackets, uschar **codeptr,
    2088   const pcre_uchar **ptrptr, const pcre_uchar *patternEnd, ErrorCode* errorcodeptr, int skipbytes,
    2089   int *firstbyteptr, int *reqbyteptr, compile_data *cd)
     2087compile_regex(int options, int* brackets, uschar** codeptr,
     2088              const pcre_uchar** ptrptr, const pcre_uchar* patternEnd, ErrorCode* errorcodeptr, int skipbytes,
     2089              int* firstbyteptr, int* reqbyteptr, compile_data* cd)
    20902090{
    2091 const pcre_uchar *ptr = *ptrptr;
    2092 uschar *code = *codeptr;
    2093 uschar *last_branch = code;
    2094 uschar *start_bracket = code;
    2095 int firstbyte, reqbyte;
    2096 int branchfirstbyte, branchreqbyte;
    2097 
    2098 firstbyte = reqbyte = REQ_UNSET;
    2099 
    2100 /* Offset is set zero to mark that this bracket is still open */
    2101 
    2102 PUT(code, 1, 0);
    2103 code += 1 + LINK_SIZE + skipbytes;
    2104 
    2105 /* Loop for each alternative branch */
    2106 
    2107 for (;;)
    2108   {
    2109   /* Now compile the branch */
    2110 
    2111   if (!compile_branch(options, brackets, &code, &ptr, patternEnd, errorcodeptr,
    2112         &branchfirstbyte, &branchreqbyte, cd))
    2113     {
    2114     *ptrptr = ptr;
    2115     return false;
     2091    const pcre_uchar* ptr = *ptrptr;
     2092    uschar* code = *codeptr;
     2093    uschar* last_branch = code;
     2094    uschar* start_bracket = code;
     2095    int firstbyte, reqbyte;
     2096    int branchfirstbyte, branchreqbyte;
     2097   
     2098    firstbyte = reqbyte = REQ_UNSET;
     2099   
     2100    /* Offset is set zero to mark that this bracket is still open */
     2101   
     2102    PUT(code, 1, 0);
     2103    code += 1 + LINK_SIZE + skipbytes;
     2104   
     2105    /* Loop for each alternative branch */
     2106   
     2107    while (true) {
     2108        /* Now compile the branch */
     2109       
     2110        if (!compile_branch(options, brackets, &code, &ptr, patternEnd, errorcodeptr,
     2111                            &branchfirstbyte, &branchreqbyte, cd)) {
     2112            *ptrptr = ptr;
     2113            return false;
     2114        }
     2115       
     2116        /* If this is the first branch, the firstbyte and reqbyte values for the
     2117         branch become the values for the regex. */
     2118       
     2119        if (*last_branch != OP_ALT) {
     2120            firstbyte = branchfirstbyte;
     2121            reqbyte = branchreqbyte;
     2122        }
     2123       
     2124        /* If this is not the first branch, the first char and reqbyte have to
     2125         match the values from all the previous branches, except that if the previous
     2126         value for reqbyte didn't have REQ_VARY set, it can still match, and we set
     2127         REQ_VARY for the regex. */
     2128       
     2129        else {
     2130            /* If we previously had a firstbyte, but it doesn't match the new branch,
     2131             we have to abandon the firstbyte for the regex, but if there was previously
     2132             no reqbyte, it takes on the value of the old firstbyte. */
     2133           
     2134            if (firstbyte >= 0 && firstbyte != branchfirstbyte) {
     2135                if (reqbyte < 0)
     2136                    reqbyte = firstbyte;
     2137                firstbyte = REQ_NONE;
     2138            }
     2139           
     2140            /* If we (now or from before) have no firstbyte, a firstbyte from the
     2141             branch becomes a reqbyte if there isn't a branch reqbyte. */
     2142           
     2143            if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
     2144                branchreqbyte = branchfirstbyte;
     2145           
     2146            /* Now ensure that the reqbytes match */
     2147           
     2148            if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
     2149                reqbyte = REQ_NONE;
     2150            else
     2151                reqbyte |= branchreqbyte;   /* To "or" REQ_VARY */
     2152        }
     2153       
     2154        /* Reached end of expression, either ')' or end of pattern. Go back through
     2155         the alternative branches and reverse the chain of offsets, with the field in
     2156         the BRA item now becoming an offset to the first alternative. If there are
     2157         no alternatives, it points to the end of the group. The length in the
     2158         terminating ket is always the length of the whole bracketed item. If any of
     2159         the ims options were changed inside the group, compile a resetting op-code
     2160         following, except at the very end of the pattern. Return leaving the pointer
     2161         at the terminating char. */
     2162       
     2163        if (ptr >= patternEnd || *ptr != '|') {
     2164            int length = code - last_branch;
     2165            do {
     2166                int prev_length = GET(last_branch, 1);
     2167                PUT(last_branch, 1, length);
     2168                length = prev_length;
     2169                last_branch -= length;
     2170            } while (length > 0);
     2171           
     2172            /* Fill in the ket */
     2173           
     2174            *code = OP_KET;
     2175            PUT(code, 1, code - start_bracket);
     2176            code += 1 + LINK_SIZE;
     2177           
     2178            /* Set values to pass back */
     2179           
     2180            *codeptr = code;
     2181            *ptrptr = ptr;
     2182            *firstbyteptr = firstbyte;
     2183            *reqbyteptr = reqbyte;
     2184            return true;
     2185        }
     2186       
     2187        /* Another branch follows; insert an "or" node. Its length field points back
     2188         to the previous branch while the bracket remains open. At the end the chain
     2189         is reversed. It's done like this so that the start of the bracket has a
     2190         zero offset until it is closed, making it possible to detect recursion. */
     2191       
     2192        *code = OP_ALT;
     2193        PUT(code, 1, code - last_branch);
     2194        last_branch = code;
     2195        code += 1 + LINK_SIZE;
     2196        ptr++;
    21162197    }
    2117 
    2118   /* If this is the first branch, the firstbyte and reqbyte values for the
    2119   branch become the values for the regex. */
    2120 
    2121   if (*last_branch != OP_ALT)
    2122     {
    2123     firstbyte = branchfirstbyte;
    2124     reqbyte = branchreqbyte;
    2125     }
    2126 
    2127   /* If this is not the first branch, the first char and reqbyte have to
    2128   match the values from all the previous branches, except that if the previous
    2129   value for reqbyte didn't have REQ_VARY set, it can still match, and we set
    2130   REQ_VARY for the regex. */
    2131 
    2132   else
    2133     {
    2134     /* If we previously had a firstbyte, but it doesn't match the new branch,
    2135     we have to abandon the firstbyte for the regex, but if there was previously
    2136     no reqbyte, it takes on the value of the old firstbyte. */
    2137 
    2138     if (firstbyte >= 0 && firstbyte != branchfirstbyte)
    2139       {
    2140       if (reqbyte < 0) reqbyte = firstbyte;
    2141       firstbyte = REQ_NONE;
    2142       }
    2143 
    2144     /* If we (now or from before) have no firstbyte, a firstbyte from the
    2145     branch becomes a reqbyte if there isn't a branch reqbyte. */
    2146 
    2147     if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
    2148         branchreqbyte = branchfirstbyte;
    2149 
    2150     /* Now ensure that the reqbytes match */
    2151 
    2152     if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
    2153       reqbyte = REQ_NONE;
    2154     else reqbyte |= branchreqbyte;   /* To "or" REQ_VARY */
    2155     }
    2156 
    2157   /* Reached end of expression, either ')' or end of pattern. Go back through
    2158   the alternative branches and reverse the chain of offsets, with the field in
    2159   the BRA item now becoming an offset to the first alternative. If there are
    2160   no alternatives, it points to the end of the group. The length in the
    2161   terminating ket is always the length of the whole bracketed item. If any of
    2162   the ims options were changed inside the group, compile a resetting op-code
    2163   following, except at the very end of the pattern. Return leaving the pointer
    2164   at the terminating char. */
    2165 
    2166   if (ptr >= patternEnd || *ptr != '|')
    2167     {
    2168     int length = code - last_branch;
    2169     do
    2170       {
    2171       int prev_length = GET(last_branch, 1);
    2172       PUT(last_branch, 1, length);
    2173       length = prev_length;
    2174       last_branch -= length;
    2175       }
    2176     while (length > 0);
    2177 
    2178     /* Fill in the ket */
    2179 
    2180     *code = OP_KET;
    2181     PUT(code, 1, code - start_bracket);
    2182     code += 1 + LINK_SIZE;
    2183 
    2184     /* Set values to pass back */
    2185 
    2186     *codeptr = code;
    2187     *ptrptr = ptr;
    2188     *firstbyteptr = firstbyte;
    2189     *reqbyteptr = reqbyte;
    2190     return true;
    2191     }
    2192 
    2193   /* Another branch follows; insert an "or" node. Its length field points back
    2194   to the previous branch while the bracket remains open. At the end the chain
    2195   is reversed. It's done like this so that the start of the bracket has a
    2196   zero offset until it is closed, making it possible to detect recursion. */
    2197 
    2198   *code = OP_ALT;
    2199   PUT(code, 1, code - last_branch);
    2200   last_branch = code;
    2201   code += 1 + LINK_SIZE;
    2202   ptr++;
    2203   }
    2204 /* Control never reaches here */
     2198    ASSERT_NOT_REACHED();
    22052199}
    2206 
    2207 
    22082200
    22092201
Note: See TracChangeset for help on using the changeset viewer.