* Returns -1 if no such index
*/
static int
-hlFirstIndex(HeadlineParsedText *prs, TSQuery query, int pos)
+hlFirstIndex(HeadlineParsedText *prs, int pos)
{
int i;
- /* For each word ... */
for (i = pos; i < prs->curwords; i++)
{
- /* ... scan the query to see if this word matches any operand */
- QueryItem *item = GETQUERY(query);
- int j;
-
- for (j = 0; j < query->size; j++)
- {
- if (item->type == QI_VAL &&
- prs->words[i].item == &item->qoperand)
- return i;
- item++;
- }
+ if (prs->words[i].item != NULL)
+ return i;
}
return -1;
}
/*
* hlCover: try to find a substring of prs' word list that satisfies query
*
- * At entry, *p must be the first word index to consider (initialize this to
- * zero, or to the next index after a previous successful search).
+ * At entry, *p must be the first word index to consider (initialize this
+ * to zero, or to the next index after a previous successful search).
+ * We will consider all substrings starting at or after that word, and
+ * containing no more than max_cover words. (We need a length limit to
+ * keep this from taking O(N^2) time for a long document with many query
+ * words but few complete matches. Actually, since checkcondition_HL is
+ * roughly O(N) in the length of the substring being checked, it's even
+ * worse than that.)
*
* On success, sets *p to first word index and *q to last word index of the
* cover substring, and returns true.
* words used in the query.
*/
static bool
-hlCover(HeadlineParsedText *prs, TSQuery query, int *p, int *q)
+hlCover(HeadlineParsedText *prs, TSQuery query, int max_cover,
+ int *p, int *q)
{
int pmin,
pmax,
* appearing in the query; there's no point in trying endpoints in between
* such points.
*/
- pmin = hlFirstIndex(prs, query, *p);
+ pmin = hlFirstIndex(prs, *p);
while (pmin >= 0)
{
/* This useless assignment just keeps stupider compilers quiet */
return true;
}
/* Nope, so advance pmax to next feasible endpoint */
- nextpmax = hlFirstIndex(prs, query, pmax + 1);
+ nextpmax = hlFirstIndex(prs, pmax + 1);
/*
* If this is our first advance past pmin, then the result is also
nextpmin = nextpmax;
pmax = nextpmax;
}
- while (pmax >= 0);
+ while (pmax >= 0 && pmax - pmin < max_cover);
/* No luck here, so try next feasible startpoint */
pmin = nextpmin;
}
static void
mark_hl_fragments(HeadlineParsedText *prs, TSQuery query, bool highlightall,
int shortword, int min_words,
- int max_words, int max_fragments)
+ int max_words, int max_fragments, int max_cover)
{
int32 poslen,
curlen,
covers = palloc(maxcovers * sizeof(CoverPos));
/* get all covers */
- while (hlCover(prs, query, &p, &q))
+ while (hlCover(prs, query, max_cover, &p, &q))
{
startpos = p;
endpos = q;
*/
static void
mark_hl_words(HeadlineParsedText *prs, TSQuery query, bool highlightall,
- int shortword, int min_words, int max_words)
+ int shortword, int min_words, int max_words, int max_cover)
{
int p = 0,
q = 0;
if (!highlightall)
{
/* examine all covers, select a headline using the best one */
- while (hlCover(prs, query, &p, &q))
+ while (hlCover(prs, query, max_cover, &p, &q))
{
/*
* Count words (curlen) and interesting words (poslen) within
int shortword = 3;
int max_fragments = 0;
bool highlightall = false;
+ int max_cover;
ListCell *l;
/* Extract configuration option values */
defel->defname)));
}
+ /*
+ * We might eventually make max_cover a user-settable parameter, but for
+ * now, just compute a reasonable value based on max_words and
+ * max_fragments.
+ */
+ max_cover = Max(max_words * 10, 100);
+ if (max_fragments > 0)
+ max_cover *= max_fragments;
+
/* in HighlightAll mode these parameters are ignored */
if (!highlightall)
{
/* Apply appropriate headline selector */
if (max_fragments == 0)
mark_hl_words(prs, query, highlightall, shortword,
- min_words, max_words);
+ min_words, max_words, max_cover);
else
mark_hl_fragments(prs, query, highlightall, shortword,
- min_words, max_words, max_fragments);
+ min_words, max_words, max_fragments, max_cover);
/* Fill in default values for string options */
if (!prs->startsel)