@@ -110,6 +110,18 @@ typedef struct TapeBlockTrailer
110
110
#define TapeBlockSetNBytes (buf , nbytes ) \
111
111
(TapeBlockGetTrailer(buf)->next = -(nbytes))
112
112
113
+ /*
114
+ * When multiple tapes are being written to concurrently (as in HashAgg),
115
+ * avoid excessive fragmentation by preallocating block numbers to individual
116
+ * tapes. Each preallocation doubles in size starting at
117
+ * TAPE_WRITE_PREALLOC_MIN blocks up to TAPE_WRITE_PREALLOC_MAX blocks.
118
+ *
119
+ * No filesystem operations are performed for preallocation; only the block
120
+ * numbers are reserved. This may lead to sparse writes, which will cause
121
+ * ltsWriteBlock() to fill in holes with zeros.
122
+ */
123
+ #define TAPE_WRITE_PREALLOC_MIN 8
124
+ #define TAPE_WRITE_PREALLOC_MAX 128
113
125
114
126
/*
115
127
* This data structure represents a single "logical tape" within the set
@@ -151,6 +163,15 @@ typedef struct LogicalTape
151
163
int max_size ; /* highest useful, safe buffer_size */
152
164
int pos ; /* next read/write position in buffer */
153
165
int nbytes ; /* total # of valid bytes in buffer */
166
+
167
+ /*
168
+ * Preallocated block numbers are held in an array sorted in descending
169
+ * order; blocks are consumed from the end of the array (lowest block
170
+ * numbers first).
171
+ */
172
+ long * prealloc ;
173
+ int nprealloc ; /* number of elements in list */
174
+ int prealloc_size ; /* number of elements list can hold */
154
175
} LogicalTape ;
155
176
156
177
/*
@@ -198,6 +219,7 @@ struct LogicalTapeSet
198
219
static void ltsWriteBlock (LogicalTapeSet * lts , long blocknum , void * buffer );
199
220
static void ltsReadBlock (LogicalTapeSet * lts , long blocknum , void * buffer );
200
221
static long ltsGetFreeBlock (LogicalTapeSet * lts );
222
+ static long ltsGetPreallocBlock (LogicalTapeSet * lts , LogicalTape * lt );
201
223
static void ltsReleaseBlock (LogicalTapeSet * lts , long blocknum );
202
224
static void ltsConcatWorkerTapes (LogicalTapeSet * lts , TapeShare * shared ,
203
225
SharedFileSet * fileset );
@@ -397,6 +419,45 @@ ltsGetFreeBlock(LogicalTapeSet *lts)
397
419
return blocknum ;
398
420
}
399
421
422
+ /*
423
+ * Return the lowest free block number from the tape's preallocation list.
424
+ * Refill the preallocation list if necessary.
425
+ */
426
+ static long
427
+ ltsGetPreallocBlock (LogicalTapeSet * lts , LogicalTape * lt )
428
+ {
429
+ /* sorted in descending order, so return the last element */
430
+ if (lt -> nprealloc > 0 )
431
+ return lt -> prealloc [-- lt -> nprealloc ];
432
+
433
+ if (lt -> prealloc == NULL )
434
+ {
435
+ lt -> prealloc_size = TAPE_WRITE_PREALLOC_MIN ;
436
+ lt -> prealloc = (long * ) palloc (sizeof (long ) * lt -> prealloc_size );
437
+ }
438
+ else if (lt -> prealloc_size < TAPE_WRITE_PREALLOC_MAX )
439
+ {
440
+ /* when the preallocation list runs out, double the size */
441
+ lt -> prealloc_size *= 2 ;
442
+ if (lt -> prealloc_size > TAPE_WRITE_PREALLOC_MAX )
443
+ lt -> prealloc_size = TAPE_WRITE_PREALLOC_MAX ;
444
+ lt -> prealloc = (long * ) repalloc (lt -> prealloc ,
445
+ sizeof (long ) * lt -> prealloc_size );
446
+ }
447
+
448
+ /* refill preallocation list */
449
+ lt -> nprealloc = lt -> prealloc_size ;
450
+ for (int i = lt -> nprealloc ; i > 0 ; i -- )
451
+ {
452
+ lt -> prealloc [i - 1 ] = ltsGetFreeBlock (lts );
453
+
454
+ /* verify descending order */
455
+ Assert (i == lt -> nprealloc || lt -> prealloc [i - 1 ] > lt -> prealloc [i ]);
456
+ }
457
+
458
+ return lt -> prealloc [-- lt -> nprealloc ];
459
+ }
460
+
400
461
/*
401
462
* Return a block# to the freelist.
402
463
*/
@@ -557,6 +618,9 @@ ltsInitTape(LogicalTape *lt)
557
618
lt -> max_size = MaxAllocSize ;
558
619
lt -> pos = 0 ;
559
620
lt -> nbytes = 0 ;
621
+ lt -> prealloc = NULL ;
622
+ lt -> nprealloc = 0 ;
623
+ lt -> prealloc_size = 0 ;
560
624
}
561
625
562
626
/*
@@ -709,7 +773,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
709
773
Assert (lt -> firstBlockNumber == -1 );
710
774
Assert (lt -> pos == 0 );
711
775
712
- lt -> curBlockNumber = ltsGetFreeBlock (lts );
776
+ lt -> curBlockNumber = ltsGetPreallocBlock (lts , lt );
713
777
lt -> firstBlockNumber = lt -> curBlockNumber ;
714
778
715
779
TapeBlockGetTrailer (lt -> buffer )-> prev = -1L ;
@@ -733,7 +797,7 @@ LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
733
797
* First allocate the next block, so that we can store it in the
734
798
* 'next' pointer of this block.
735
799
*/
736
- nextBlockNumber = ltsGetFreeBlock (lts );
800
+ nextBlockNumber = ltsGetPreallocBlock (lts , lt );
737
801
738
802
/* set the next-pointer and dump the current block. */
739
803
TapeBlockGetTrailer (lt -> buffer )-> next = nextBlockNumber ;
@@ -835,13 +899,23 @@ LogicalTapeRewindForRead(LogicalTapeSet *lts, int tapenum, size_t buffer_size)
835
899
Assert (lt -> frozen );
836
900
}
837
901
838
- /* Allocate a read buffer (unless the tape is empty) */
839
902
if (lt -> buffer )
840
903
pfree (lt -> buffer );
841
904
842
905
/* the buffer is lazily allocated, but set the size here */
843
906
lt -> buffer = NULL ;
844
907
lt -> buffer_size = buffer_size ;
908
+
909
+ /* free the preallocation list, and return unused block numbers */
910
+ if (lt -> prealloc != NULL )
911
+ {
912
+ for (int i = lt -> nprealloc ; i > 0 ; i -- )
913
+ ltsReleaseBlock (lts , lt -> prealloc [i - 1 ]);
914
+ pfree (lt -> prealloc );
915
+ lt -> prealloc = NULL ;
916
+ lt -> nprealloc = 0 ;
917
+ lt -> prealloc_size = 0 ;
918
+ }
845
919
}
846
920
847
921
/*
0 commit comments