Limit the number of number of tapes used for a sort to 501.

author Robert Haas <[email protected]>

Tue, 15 Nov 2016 15:28:46 +0000 (10:28 -0500)

committer Robert Haas <[email protected]>

Tue, 15 Nov 2016 15:30:33 +0000 (10:30 -0500)
author Robert Haas <[email protected]>
Tue, 15 Nov 2016 15:28:46 +0000 (10:28 -0500)
committer Robert Haas <[email protected]>
Tue, 15 Nov 2016 15:30:33 +0000 (10:30 -0500)
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index 46512cfca1a4aca2bda4160f6322c8c92d4c8708..3ad81862f35f125d245838193c7297211f825205 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -108,7 +108,8 @@
   * code we determine the number of tapes M on the basis of workMem: we want
   * workMem/M to be large enough that we read a fair amount of data each time
   * we preread from a tape, so as to maintain the locality of access described
- * above.  Nonetheless, with large workMem we can have many tapes.
+ * above.  Nonetheless, with large workMem we can have many tapes (but not
+ * too many -- see the comments in tuplesort_merge_order).
   *
   *
   * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
@@ -247,6 +248,7 @@ typedef enum
   * tape during a preread cycle (see discussion at top of file).
   */
  #define MINORDER       6       /* minimum merge order */
+#define MAXORDER       500     /* maximum merge order */
  #define TAPE_BUFFER_OVERHEAD       (BLCKSZ * 3)
  #define MERGE_BUFFER_SIZE          (BLCKSZ * 32)
  
@@ -2313,8 +2315,19 @@ tuplesort_merge_order(int64 allowedMem)
     mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) /
         (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD);
  
-   /* Even in minimum memory, use at least a MINORDER merge */
+   /*
+    * Even in minimum memory, use at least a MINORDER merge.  On the other
+    * hand, even when we have lots of memory, do not use more than a MAXORDER
+    * merge.  Tapes are pretty cheap, but they're not entirely free.  Each
+    * additional tape reduces the amount of memory available to build runs,
+    * which in turn can cause the same sort to need more runs, which makes
+    * merging slower even if it can still be done in a single pass.  Also,
+    * high order merges are quite slow due to CPU cache effects; it can be
+    * faster to pay the I/O cost of a polyphase merge than to perform a single
+    * merge pass across many hundreds of tapes.
+    */
     mOrder = Max(mOrder, MINORDER);
+   mOrder = Min(mOrder, MAXORDER);
  
     return mOrder;
  }
author	Robert Haas <[email protected]>
	Tue, 15 Nov 2016 15:28:46 +0000 (10:28 -0500)
committer	Robert Haas <[email protected]>
	Tue, 15 Nov 2016 15:30:33 +0000 (10:30 -0500)