Changeset 27807 in webkit for trunk/JavaScriptCore/pcre/pcre_compile.cpp
- Timestamp:
- Nov 14, 2007, 6:40:31 PM (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/JavaScriptCore/pcre/pcre_compile.cpp
r27805 r27807 2085 2085 2086 2086 static BOOL 2087 compile_regex(int options, int *brackets, uschar **codeptr,2088 const pcre_uchar **ptrptr, const pcre_uchar *patternEnd, ErrorCode* errorcodeptr, int skipbytes,2089 int *firstbyteptr, int *reqbyteptr, compile_data *cd)2087 compile_regex(int options, int* brackets, uschar** codeptr, 2088 const pcre_uchar** ptrptr, const pcre_uchar* patternEnd, ErrorCode* errorcodeptr, int skipbytes, 2089 int* firstbyteptr, int* reqbyteptr, compile_data* cd) 2090 2090 { 2091 const pcre_uchar *ptr = *ptrptr; 2092 uschar *code = *codeptr; 2093 uschar *last_branch = code; 2094 uschar *start_bracket = code; 2095 int firstbyte, reqbyte; 2096 int branchfirstbyte, branchreqbyte; 2097 2098 firstbyte = reqbyte = REQ_UNSET; 2099 2100 /* Offset is set zero to mark that this bracket is still open */ 2101 2102 PUT(code, 1, 0); 2103 code += 1 + LINK_SIZE + skipbytes; 2104 2105 /* Loop for each alternative branch */ 2106 2107 for (;;) 2108 { 2109 /* Now compile the branch */ 2110 2111 if (!compile_branch(options, brackets, &code, &ptr, patternEnd, errorcodeptr, 2112 &branchfirstbyte, &branchreqbyte, cd)) 2113 { 2114 *ptrptr = ptr; 2115 return false; 2091 const pcre_uchar* ptr = *ptrptr; 2092 uschar* code = *codeptr; 2093 uschar* last_branch = code; 2094 uschar* start_bracket = code; 2095 int firstbyte, reqbyte; 2096 int branchfirstbyte, branchreqbyte; 2097 2098 firstbyte = reqbyte = REQ_UNSET; 2099 2100 /* Offset is set zero to mark that this bracket is still open */ 2101 2102 PUT(code, 1, 0); 2103 code += 1 + LINK_SIZE + skipbytes; 2104 2105 /* Loop for each alternative branch */ 2106 2107 while (true) { 2108 /* Now compile the branch */ 2109 2110 if (!compile_branch(options, brackets, &code, &ptr, patternEnd, errorcodeptr, 2111 &branchfirstbyte, &branchreqbyte, cd)) { 2112 *ptrptr = ptr; 2113 return false; 2114 } 2115 2116 /* If this is the first branch, the firstbyte and reqbyte values for the 2117 branch become the values for the regex. */ 2118 2119 if (*last_branch != OP_ALT) { 2120 firstbyte = branchfirstbyte; 2121 reqbyte = branchreqbyte; 2122 } 2123 2124 /* If this is not the first branch, the first char and reqbyte have to 2125 match the values from all the previous branches, except that if the previous 2126 value for reqbyte didn't have REQ_VARY set, it can still match, and we set 2127 REQ_VARY for the regex. */ 2128 2129 else { 2130 /* If we previously had a firstbyte, but it doesn't match the new branch, 2131 we have to abandon the firstbyte for the regex, but if there was previously 2132 no reqbyte, it takes on the value of the old firstbyte. */ 2133 2134 if (firstbyte >= 0 && firstbyte != branchfirstbyte) { 2135 if (reqbyte < 0) 2136 reqbyte = firstbyte; 2137 firstbyte = REQ_NONE; 2138 } 2139 2140 /* If we (now or from before) have no firstbyte, a firstbyte from the 2141 branch becomes a reqbyte if there isn't a branch reqbyte. */ 2142 2143 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) 2144 branchreqbyte = branchfirstbyte; 2145 2146 /* Now ensure that the reqbytes match */ 2147 2148 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) 2149 reqbyte = REQ_NONE; 2150 else 2151 reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ 2152 } 2153 2154 /* Reached end of expression, either ')' or end of pattern. Go back through 2155 the alternative branches and reverse the chain of offsets, with the field in 2156 the BRA item now becoming an offset to the first alternative. If there are 2157 no alternatives, it points to the end of the group. The length in the 2158 terminating ket is always the length of the whole bracketed item. If any of 2159 the ims options were changed inside the group, compile a resetting op-code 2160 following, except at the very end of the pattern. Return leaving the pointer 2161 at the terminating char. */ 2162 2163 if (ptr >= patternEnd || *ptr != '|') { 2164 int length = code - last_branch; 2165 do { 2166 int prev_length = GET(last_branch, 1); 2167 PUT(last_branch, 1, length); 2168 length = prev_length; 2169 last_branch -= length; 2170 } while (length > 0); 2171 2172 /* Fill in the ket */ 2173 2174 *code = OP_KET; 2175 PUT(code, 1, code - start_bracket); 2176 code += 1 + LINK_SIZE; 2177 2178 /* Set values to pass back */ 2179 2180 *codeptr = code; 2181 *ptrptr = ptr; 2182 *firstbyteptr = firstbyte; 2183 *reqbyteptr = reqbyte; 2184 return true; 2185 } 2186 2187 /* Another branch follows; insert an "or" node. Its length field points back 2188 to the previous branch while the bracket remains open. At the end the chain 2189 is reversed. It's done like this so that the start of the bracket has a 2190 zero offset until it is closed, making it possible to detect recursion. */ 2191 2192 *code = OP_ALT; 2193 PUT(code, 1, code - last_branch); 2194 last_branch = code; 2195 code += 1 + LINK_SIZE; 2196 ptr++; 2116 2197 } 2117 2118 /* If this is the first branch, the firstbyte and reqbyte values for the 2119 branch become the values for the regex. */ 2120 2121 if (*last_branch != OP_ALT) 2122 { 2123 firstbyte = branchfirstbyte; 2124 reqbyte = branchreqbyte; 2125 } 2126 2127 /* If this is not the first branch, the first char and reqbyte have to 2128 match the values from all the previous branches, except that if the previous 2129 value for reqbyte didn't have REQ_VARY set, it can still match, and we set 2130 REQ_VARY for the regex. */ 2131 2132 else 2133 { 2134 /* If we previously had a firstbyte, but it doesn't match the new branch, 2135 we have to abandon the firstbyte for the regex, but if there was previously 2136 no reqbyte, it takes on the value of the old firstbyte. */ 2137 2138 if (firstbyte >= 0 && firstbyte != branchfirstbyte) 2139 { 2140 if (reqbyte < 0) reqbyte = firstbyte; 2141 firstbyte = REQ_NONE; 2142 } 2143 2144 /* If we (now or from before) have no firstbyte, a firstbyte from the 2145 branch becomes a reqbyte if there isn't a branch reqbyte. */ 2146 2147 if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) 2148 branchreqbyte = branchfirstbyte; 2149 2150 /* Now ensure that the reqbytes match */ 2151 2152 if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) 2153 reqbyte = REQ_NONE; 2154 else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ 2155 } 2156 2157 /* Reached end of expression, either ')' or end of pattern. Go back through 2158 the alternative branches and reverse the chain of offsets, with the field in 2159 the BRA item now becoming an offset to the first alternative. If there are 2160 no alternatives, it points to the end of the group. The length in the 2161 terminating ket is always the length of the whole bracketed item. If any of 2162 the ims options were changed inside the group, compile a resetting op-code 2163 following, except at the very end of the pattern. Return leaving the pointer 2164 at the terminating char. */ 2165 2166 if (ptr >= patternEnd || *ptr != '|') 2167 { 2168 int length = code - last_branch; 2169 do 2170 { 2171 int prev_length = GET(last_branch, 1); 2172 PUT(last_branch, 1, length); 2173 length = prev_length; 2174 last_branch -= length; 2175 } 2176 while (length > 0); 2177 2178 /* Fill in the ket */ 2179 2180 *code = OP_KET; 2181 PUT(code, 1, code - start_bracket); 2182 code += 1 + LINK_SIZE; 2183 2184 /* Set values to pass back */ 2185 2186 *codeptr = code; 2187 *ptrptr = ptr; 2188 *firstbyteptr = firstbyte; 2189 *reqbyteptr = reqbyte; 2190 return true; 2191 } 2192 2193 /* Another branch follows; insert an "or" node. Its length field points back 2194 to the previous branch while the bracket remains open. At the end the chain 2195 is reversed. It's done like this so that the start of the bracket has a 2196 zero offset until it is closed, making it possible to detect recursion. */ 2197 2198 *code = OP_ALT; 2199 PUT(code, 1, code - last_branch); 2200 last_branch = code; 2201 code += 1 + LINK_SIZE; 2202 ptr++; 2203 } 2204 /* Control never reaches here */ 2198 ASSERT_NOT_REACHED(); 2205 2199 } 2206 2207 2208 2200 2209 2201
Note:
See TracChangeset
for help on using the changeset viewer.