1 | // -*- c-basic-offset: 2 -*-
|
---|
2 | /*
|
---|
3 | * This file is part of the KDE libraries
|
---|
4 | * Copyright (C) 1999-2001,2004 Harri Porten ([email protected])
|
---|
5 | *
|
---|
6 | * This library is free software; you can redistribute it and/or
|
---|
7 | * modify it under the terms of the GNU Lesser General Public
|
---|
8 | * License as published by the Free Software Foundation; either
|
---|
9 | * version 2 of the License, or (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This library is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
14 | * Lesser General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU Lesser General Public
|
---|
17 | * License along with this library; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
---|
19 | *
|
---|
20 | */
|
---|
21 |
|
---|
22 | #include "config.h"
|
---|
23 | #include "regexp.h"
|
---|
24 |
|
---|
25 | #include "lexer.h"
|
---|
26 |
|
---|
27 | #include <assert.h>
|
---|
28 | #include <stdio.h>
|
---|
29 | #include <stdlib.h>
|
---|
30 | #include <string.h>
|
---|
31 |
|
---|
32 | namespace KJS {
|
---|
33 |
|
---|
34 | RegExp::RegExp(const UString &p, int flags)
|
---|
35 | : m_flags(flags), m_constructionError(0), m_numSubPatterns(0)
|
---|
36 | {
|
---|
37 | #if HAVE(PCREPOSIX)
|
---|
38 |
|
---|
39 | int options = PCRE_UTF8;
|
---|
40 | // Note: the Global flag is already handled by RegExpProtoFunc::execute.
|
---|
41 | // FIXME: That last comment is dubious. Not all RegExps get run through RegExpProtoFunc::execute.
|
---|
42 | if (flags & IgnoreCase)
|
---|
43 | options |= PCRE_CASELESS;
|
---|
44 | if (flags & Multiline)
|
---|
45 | options |= PCRE_MULTILINE;
|
---|
46 |
|
---|
47 | const char *errorMessage;
|
---|
48 | int errorOffset;
|
---|
49 |
|
---|
50 | m_regex = pcre_compile(reinterpret_cast<const uint16_t*>(p.data()), p.size(),
|
---|
51 | options, &errorMessage, &errorOffset, NULL);
|
---|
52 | if (!m_regex) {
|
---|
53 | m_constructionError = strdup(errorMessage);
|
---|
54 | return;
|
---|
55 | }
|
---|
56 |
|
---|
57 | #ifdef PCRE_INFO_CAPTURECOUNT
|
---|
58 | // Get number of subpatterns that will be returned.
|
---|
59 | pcre_fullinfo(m_regex, NULL, PCRE_INFO_CAPTURECOUNT, &m_numSubPatterns);
|
---|
60 | #endif
|
---|
61 |
|
---|
62 | #else /* HAVE(PCREPOSIX) */
|
---|
63 |
|
---|
64 | int regflags = 0;
|
---|
65 | #ifdef REG_EXTENDED
|
---|
66 | regflags |= REG_EXTENDED;
|
---|
67 | #endif
|
---|
68 | #ifdef REG_ICASE
|
---|
69 | if ( f & IgnoreCase )
|
---|
70 | regflags |= REG_ICASE;
|
---|
71 | #endif
|
---|
72 |
|
---|
73 | //NOTE: Multiline is not feasible with POSIX regex.
|
---|
74 | //if ( f & Multiline )
|
---|
75 | // ;
|
---|
76 | // Note: the Global flag is already handled by RegExpProtoFunc::execute
|
---|
77 |
|
---|
78 | // FIXME: support \u Unicode escapes.
|
---|
79 |
|
---|
80 | int errorCode = regcomp(&m_regex, intern.ascii(), regflags);
|
---|
81 | if (errorCode != 0) {
|
---|
82 | char errorMessage[80];
|
---|
83 | regerror(errorCode, &m_regex, errorMessage, sizeof errorMessage);
|
---|
84 | m_constructionError = strdup(errorMessage);
|
---|
85 | }
|
---|
86 |
|
---|
87 | #endif
|
---|
88 | }
|
---|
89 |
|
---|
90 | RegExp::~RegExp()
|
---|
91 | {
|
---|
92 | #if HAVE(PCREPOSIX)
|
---|
93 | pcre_free(m_regex);
|
---|
94 | #else
|
---|
95 | /* TODO: is this really okay after an error ? */
|
---|
96 | regfree(&m_regex);
|
---|
97 | #endif
|
---|
98 | free(m_constructionError);
|
---|
99 | }
|
---|
100 |
|
---|
101 | UString RegExp::match(const UString &s, int i, int *pos, int **ovector)
|
---|
102 | {
|
---|
103 | if (i < 0)
|
---|
104 | i = 0;
|
---|
105 | int dummyPos;
|
---|
106 | if (!pos)
|
---|
107 | pos = &dummyPos;
|
---|
108 | *pos = -1;
|
---|
109 | if (ovector)
|
---|
110 | *ovector = 0;
|
---|
111 |
|
---|
112 | if (i > s.size() || s.isNull())
|
---|
113 | return UString::null();
|
---|
114 |
|
---|
115 | #if HAVE(PCREPOSIX)
|
---|
116 |
|
---|
117 | if (!m_regex)
|
---|
118 | return UString::null();
|
---|
119 |
|
---|
120 | // Set up the offset vector for the result.
|
---|
121 | // First 2/3 used for result, the last third used by PCRE.
|
---|
122 | int *offsetVector;
|
---|
123 | int offsetVectorSize;
|
---|
124 | int fixedSizeOffsetVector[3];
|
---|
125 | if (!ovector) {
|
---|
126 | offsetVectorSize = 3;
|
---|
127 | offsetVector = fixedSizeOffsetVector;
|
---|
128 | } else {
|
---|
129 | offsetVectorSize = (m_numSubPatterns + 1) * 3;
|
---|
130 | offsetVector = new int [offsetVectorSize];
|
---|
131 | }
|
---|
132 |
|
---|
133 | const int numMatches = pcre_exec(m_regex, NULL, reinterpret_cast<const uint16_t *>(s.data()), s.size(), i, 0, offsetVector, offsetVectorSize);
|
---|
134 |
|
---|
135 | if (numMatches < 0) {
|
---|
136 | #ifndef NDEBUG
|
---|
137 | if (numMatches != PCRE_ERROR_NOMATCH)
|
---|
138 | fprintf(stderr, "KJS: pcre_exec() failed with result %d\n", numMatches);
|
---|
139 | #endif
|
---|
140 | if (offsetVector != fixedSizeOffsetVector)
|
---|
141 | delete [] offsetVector;
|
---|
142 | return UString::null();
|
---|
143 | }
|
---|
144 |
|
---|
145 | *pos = offsetVector[0];
|
---|
146 | if (ovector)
|
---|
147 | *ovector = offsetVector;
|
---|
148 | return s.substr(offsetVector[0], offsetVector[1] - offsetVector[0]);
|
---|
149 |
|
---|
150 | #else
|
---|
151 |
|
---|
152 | const unsigned maxMatch = 10;
|
---|
153 | regmatch_t rmatch[maxMatch];
|
---|
154 |
|
---|
155 | char *str = strdup(s.ascii()); // TODO: why ???
|
---|
156 | if (regexec(&m_regex, str + i, maxMatch, rmatch, 0)) {
|
---|
157 | free(str);
|
---|
158 | return UString::null();
|
---|
159 | }
|
---|
160 | free(str);
|
---|
161 |
|
---|
162 | if (!ovector) {
|
---|
163 | *pos = rmatch[0].rm_so + i;
|
---|
164 | return s.substr(rmatch[0].rm_so + i, rmatch[0].rm_eo - rmatch[0].rm_so);
|
---|
165 | }
|
---|
166 |
|
---|
167 | // map rmatch array to ovector used in PCRE case
|
---|
168 | m_numSubPatterns = 0;
|
---|
169 | for(unsigned j = 1; j < maxMatch && rmatch[j].rm_so >= 0 ; j++)
|
---|
170 | m_numSubPatterns++;
|
---|
171 | int ovecsize = (m_numSubPatterns+1)*3; // see above
|
---|
172 | *ovector = new int[ovecsize];
|
---|
173 | for (unsigned j = 0; j < m_numSubPatterns + 1; j++) {
|
---|
174 | if (j>maxMatch)
|
---|
175 | break;
|
---|
176 | (*ovector)[2*j] = rmatch[j].rm_so + i;
|
---|
177 | (*ovector)[2*j+1] = rmatch[j].rm_eo + i;
|
---|
178 | }
|
---|
179 |
|
---|
180 | *pos = (*ovector)[0];
|
---|
181 | return s.substr((*ovector)[0], (*ovector)[1] - (*ovector)[0]);
|
---|
182 |
|
---|
183 | #endif
|
---|
184 | }
|
---|
185 |
|
---|
186 | bool RegExp::isHexDigit(UChar uc)
|
---|
187 | {
|
---|
188 | int c = uc.unicode();
|
---|
189 | return (c >= '0' && c <= '9' ||
|
---|
190 | c >= 'a' && c <= 'f' ||
|
---|
191 | c >= 'A' && c <= 'F');
|
---|
192 | }
|
---|
193 |
|
---|
194 | unsigned char RegExp::convertHex(int c)
|
---|
195 | {
|
---|
196 | if (c >= '0' && c <= '9')
|
---|
197 | return static_cast<unsigned char>(c - '0');
|
---|
198 | if (c >= 'a' && c <= 'f')
|
---|
199 | return static_cast<unsigned char>(c - 'a' + 10);
|
---|
200 | return static_cast<unsigned char>(c - 'A' + 10);
|
---|
201 | }
|
---|
202 |
|
---|
203 | unsigned char RegExp::convertHex(int c1, int c2)
|
---|
204 | {
|
---|
205 | return ((convertHex(c1) << 4) + convertHex(c2));
|
---|
206 | }
|
---|
207 |
|
---|
208 | UChar RegExp::convertUnicode(UChar uc1, UChar uc2, UChar uc3, UChar uc4)
|
---|
209 | {
|
---|
210 | int c1 = uc1.unicode();
|
---|
211 | int c2 = uc2.unicode();
|
---|
212 | int c3 = uc3.unicode();
|
---|
213 | int c4 = uc4.unicode();
|
---|
214 | return UChar((convertHex(c1) << 4) + convertHex(c2),
|
---|
215 | (convertHex(c3) << 4) + convertHex(c4));
|
---|
216 | }
|
---|
217 |
|
---|
218 | } // namespace KJS
|
---|