/*-------------------------------------------------------------------------
*
* like_match.c
- * like expression handling internal code.
+ * LIKE pattern matching internal code.
*
- * This file is included by like.c four times, to provide natching code for
- * single-byte encodings, UTF8, and for other multi-byte encodings,
- * and case insensitive matches for single byte encodings.
- * UTF8 is a special case because we can use a much more efficient version
- * of NextChar than can be used for other multi-byte encodings.
+ * This file is included by like.c four times, to provide matching code for
+ * (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
+ * and (4) case insensitive matches in single byte encodings.
+ * (UTF8 is a special case because we can use a much more efficient version
+ * of NextChar than can be used for general multi-byte encodings.)
*
* Before the inclusion, we need to define the following macros:
*
* NextChar
* MatchText - to name of function wanted
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
- * MATCH_LOWER - define iff using to_lower on text chars
+ * MATCH_LOWER - define for case (4), using to_lower on single-byte chars
*
* Copyright (c) 1996-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.20.2.3 2009/05/24 18:10:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.20.2.4 2010/05/28 17:35:36 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*/
#ifdef MATCH_LOWER
-#define TCHAR(t) ((char) tolower((unsigned char) (t)))
+#define GETCHAR(t) ((char) tolower((unsigned char) (t)))
#else
-#define TCHAR(t) (t)
+#define GETCHAR(t) (t)
#endif
static int
{
if (*p == '\\')
{
- /* Next byte must match literally, whatever it is */
+ /* Next pattern byte must match literally, whatever it is */
NextByte(p, plen);
- if ((plen <= 0) || TCHAR(*p) != TCHAR(*t))
+ if (plen <= 0 || GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE;
}
else if (*p == '%')
{
+ char firstpat;
+
/*
- * % processing is essentially a search for a match for what
- * follows the %, plus a recursive match of the remainder. We
- * succeed if and only if both conditions are met.
+ * % processing is essentially a search for a text position at
+ * which the remainder of the text matches the remainder of the
+ * pattern, using a recursive call to check each potential match.
+ *
+ * If there are wildcards immediately following the %, we can skip
+ * over them first, using the idea that any sequence of N _'s and
+ * one or more %'s is equivalent to N _'s and one % (ie, it will
+ * match any sequence of at least N text characters). In this
+ * way we will always run the recursive search loop using a
+ * pattern fragment that begins with a literal character-to-match,
+ * thereby not recursing more than we have to.
*/
+ NextByte(p, plen);
+
+ while (plen > 0)
+ {
+ if (*p == '%')
+ NextByte(p, plen);
+ else if (*p == '_')
+ {
+ /* If not enough text left to match the pattern, ABORT */
+ if (tlen <= 0)
+ return LIKE_ABORT;
+ NextChar(t, tlen);
+ NextByte(p, plen);
+ }
+ else
+ break; /* Reached a non-wildcard pattern char */
+ }
- /* %% is the same as % according to the SQL standard */
- /* Advance past all %'s */
- while (plen > 0 && *p == '%')
- NextByte(p, plen);
- /* Trailing percent matches everything. */
+ /*
+ * If we're at end of pattern, match: we have a trailing % which
+ * matches any remaining text string.
+ */
if (plen <= 0)
return LIKE_TRUE;
/*
* Otherwise, scan for a text position at which we can match the
- * rest of the pattern.
+ * rest of the pattern. The first remaining pattern char is known
+ * to be a regular or escaped literal character, so we can compare
+ * the first pattern byte to each text byte to avoid recursing
+ * more than we have to. This fact also guarantees that we don't
+ * have to consider a match to the zero-length substring at the
+ * end of the text.
*/
- if (*p == '_')
+ if (*p == '\\')
{
- /* %_ is the same as _% - avoid matching _ repeatedly */
+ if (plen < 2)
+ return LIKE_FALSE; /* XXX should throw error */
+ firstpat = GETCHAR(p[1]);
+ }
+ else
+ firstpat = GETCHAR(*p);
- do
- {
- NextChar(t, tlen);
- NextByte(p, plen);
- } while (tlen > 0 && plen > 0 && *p == '_');
-
- /*
- * If we are at the end of the pattern, succeed: % followed
- * by n _'s matches any string of at least n characters, and
- * we have now found there are at least n characters.
- */
- if (plen <= 0)
- return LIKE_TRUE;
-
- /* Look for a place that matches the rest of the pattern */
- while (tlen > 0)
+ while (tlen > 0)
+ {
+ if (GETCHAR(*t) == firstpat)
{
int matched = MatchText(t, tlen, p, plen);
if (matched != LIKE_FALSE)
- return matched; /* TRUE or ABORT */
-
- NextChar(t, tlen);
- }
- }
- else
- {
- char firstpat = TCHAR(*p);
-
- if (*p == '\\')
- {
- if (plen < 2)
- return LIKE_FALSE;
- firstpat = TCHAR(p[1]);
+ return matched; /* TRUE or ABORT */
}
- while (tlen > 0)
- {
- /*
- * Optimization to prevent most recursion: don't recurse
- * unless first pattern byte matches first text byte.
- */
- if (TCHAR(*t) == firstpat)
- {
- int matched = MatchText(t, tlen, p, plen);
-
- if (matched != LIKE_FALSE)
- return matched; /* TRUE or ABORT */
- }
-
- NextChar(t, tlen);
- }
+ NextChar(t, tlen);
}
/*
NextByte(p, plen);
continue;
}
- else if (TCHAR(*p) != TCHAR(*t))
+ else if (GETCHAR(*p) != GETCHAR(*t))
{
/* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
- /* End of text string. Do we have matching pattern remaining? */
- while (plen > 0 && *p == '%') /* allow multiple %'s at end of pattern */
+ /*
+ * End of text, but perhaps not of pattern. Match iff the remaining
+ * pattern can match a zero-length string, ie, it's zero or more %'s.
+ */
+ while (plen > 0 && *p == '%')
NextByte(p, plen);
-
if (plen <= 0)
return LIKE_TRUE;
#undef do_like_escape
#endif
-#undef TCHAR
+#undef GETCHAR
#ifdef MATCH_LOWER
#undef MATCH_LOWER
+
#endif