Fix a bug with building rtree_gist indexes.
authorTom Lane <[email protected]>
Tue, 28 May 2002 15:25:03 +0000 (15:25 +0000)
committerTom Lane <[email protected]>
Tue, 28 May 2002 15:25:03 +0000 (15:25 +0000)
Patch from Teodor Sigaev.

contrib/rtree_gist/rtree_gist.c

index cbbe024c030eee556db52a81819ee57d0763d3e9..5ad261dc13cbba7561c9049fed86eac7fa0fd2a1 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.4 2001/10/25 05:49:20 momjian Exp $
+ * $Header: /cvsroot/pgsql/contrib/rtree_gist/Attic/rtree_gist.c,v 1.4.2.1 2002/05/28 15:25:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -161,6 +161,22 @@ gbox_penalty(PG_FUNCTION_ARGS)
    PG_RETURN_POINTER(result);
 }
 
+typedef struct {
+   BOX     *key;
+   int     pos;
+} KBsort;
+
+static int
+compare_KB(const void* a, const void* b) {
+   BOX *abox = ((KBsort*)a)->key; 
+   BOX *bbox = ((KBsort*)b)->key;
+   float sa = (abox->high.x - abox->low.x) * (abox->high.y - abox->low.y);
+   float sb = (bbox->high.x - bbox->low.x) * (bbox->high.y - bbox->low.y);
+
+   if ( sa==sb ) return 0;
+   return ( sa>sb ) ? 1 : -1;
+}
+
 /*
 ** The GiST PickSplit method
 ** New linear algorithm, see 'New Linear Node Splitting Algorithm for R-tree',
@@ -201,26 +217,22 @@ gbox_picksplit(PG_FUNCTION_ARGS)
    for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i))
    {
        cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key);
-       if (pageunion.high.x < cur->high.x)
-       {
+       if ( allisequal == true &&  (
+               pageunion.high.x != cur->high.x || 
+               pageunion.high.y != cur->high.y || 
+               pageunion.low.x != cur->low.x || 
+               pageunion.low.y != cur->low.y 
+           ) ) 
            allisequal = false;
+       if (pageunion.high.x < cur->high.x)
            pageunion.high.x = cur->high.x;
-       }
        if (pageunion.low.x > cur->low.x)
-       {
-           allisequal = false;
            pageunion.low.x = cur->low.x;
-       }
        if (pageunion.high.y < cur->high.y)
-       {
-           allisequal = false;
            pageunion.high.y = cur->high.y;
-       }
        if (pageunion.low.y > cur->low.y)
-       {
-           allisequal = false;
            pageunion.low.y = cur->low.y;
-       }
    }
 
    nbytes = (maxoff + 2) * sizeof(OffsetNumber);
@@ -264,7 +276,7 @@ gbox_picksplit(PG_FUNCTION_ARGS)
    unionB = (BOX *) palloc(sizeof(BOX));
    unionT = (BOX *) palloc(sizeof(BOX));
 
-#define ADDLIST( list, unionD, pos ) do { \
+#define ADDLIST( list, unionD, pos, num ) do { \
    if ( pos ) { \
        if ( unionD->high.x < cur->high.x ) unionD->high.x  = cur->high.x; \
        if ( unionD->low.x  > cur->low.x  ) unionD->low.x   = cur->low.x; \
@@ -273,7 +285,7 @@ gbox_picksplit(PG_FUNCTION_ARGS)
    } else { \
            memcpy( (void*)unionD, (void*) cur, sizeof( BOX ) );  \
    } \
-   list[pos] = i; \
+   list[pos] = num; \
    (pos)++; \
 } while(0)
 
@@ -281,17 +293,50 @@ gbox_picksplit(PG_FUNCTION_ARGS)
    {
        cur = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key);
        if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
-           ADDLIST(listL, unionL, posL);
+           ADDLIST(listL, unionL, posL,i);
        else
-           ADDLIST(listR, unionR, posR);
+           ADDLIST(listR, unionR, posR,i);
        if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
-           ADDLIST(listB, unionB, posB);
+           ADDLIST(listB, unionB, posB,i);
        else
-           ADDLIST(listT, unionT, posT);
+           ADDLIST(listT, unionT, posT,i);
    }
 
-   /* which split more optimal? */
+   /* bad disposition, sort by ascending and resplit */
+   if ( (posR==0 || posL==0) && (posT==0 || posB==0) ) {
+       KBsort *arr = (KBsort*)palloc( sizeof(KBsort) * maxoff );
+       posL = posR = posB = posT = 0;
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+           arr[i-1].key = DatumGetBoxP(((GISTENTRY *) VARDATA(entryvec))[i].key);
+           arr[i-1].pos = i;
+       }
+       qsort( arr, maxoff, sizeof(KBsort), compare_KB );
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+           cur = arr[i-1].key;
+           if (cur->low.x - pageunion.low.x < pageunion.high.x - cur->high.x)
+               ADDLIST(listL, unionL, posL,arr[i-1].pos);
+           else if ( cur->low.x - pageunion.low.x == pageunion.high.x - cur->high.x ) {
+               if ( posL>posR )
+                   ADDLIST(listR, unionR, posR,arr[i-1].pos);
+               else
+                   ADDLIST(listL, unionL, posL,arr[i-1].pos);
+           } else
+               ADDLIST(listR, unionR, posR,arr[i-1].pos);
+
+           if (cur->low.y - pageunion.low.y < pageunion.high.y - cur->high.y)
+               ADDLIST(listB, unionB, posB,arr[i-1].pos);
+           else if ( cur->low.y - pageunion.low.y == pageunion.high.y - cur->high.y ) {
+               if ( posB>posT )
+                   ADDLIST(listT, unionT, posT,arr[i-1].pos);
+               else
+                   ADDLIST(listB, unionB, posB,arr[i-1].pos);
+           } else
+               ADDLIST(listT, unionT, posT,arr[i-1].pos);
+       }
+       pfree(arr);
+   }
 
+   /* which split more optimal? */
    if (Max(posL, posR) < Max(posB, posT))
        direction = 'x';
    else if (Max(posL, posR) > Max(posB, posT))