(errcode(ERRCODE_SNAPSHOT_TOO_OLD),
errmsg("snapshot too old")));
}
+
+
+/*
+ * CheckBuffer
+ *
+ * Check the state of a buffer without loading it into the shared buffers. To
+ * avoid torn pages and possible false positives when reading data, a shared
+ * LWLock is taken on the target buffer pool partition mapping, and we check
+ * if the page is in shared buffers or not. An I/O lock is taken on the block
+ * to prevent any concurrent activity from happening.
+ *
+ * If the page is found as dirty in the shared buffers, it is ignored as
+ * it will be flushed to disk either before the end of the next checkpoint
+ * or during recovery in the event of an unsafe shutdown.
+ *
+ * If the page is found in the shared buffers but is not dirty, we still
+ * check the state of its data on disk, as it could be possible that the
+ * page stayed in shared buffers for a rather long time while the on-disk
+ * data got corrupted.
+ *
+ * If the page is not found in shared buffers, the block is read from disk
+ * while holding the buffer pool partition mapping LWLock.
+ *
+ * The page data is stored in a private memory area local to this function
+ * while running the checks.
+ */
+bool
+CheckBuffer(SMgrRelation smgr, ForkNumber forknum, BlockNumber blkno)
+{
+ char buffer[BLCKSZ];
+ BufferTag buf_tag; /* identity of requested block */
+ uint32 buf_hash; /* hash value for buf_tag */
+ LWLock *partLock; /* buffer partition lock for the buffer */
+ BufferDesc *bufdesc;
+ int buf_id;
+
+ Assert(smgrexists(smgr, forknum));
+
+ /* create a tag so we can look after the buffer */
+ INIT_BUFFERTAG(buf_tag, smgr->smgr_rnode.node, forknum, blkno);
+
+ /* determine its hash code and partition lock ID */
+ buf_hash = BufTableHashCode(&buf_tag);
+ partLock = BufMappingPartitionLock(buf_hash);
+
+ /* see if the block is in the buffer pool or not */
+ LWLockAcquire(partLock, LW_SHARED);
+ buf_id = BufTableLookup(&buf_tag, buf_hash);
+ if (buf_id >= 0)
+ {
+ uint32 buf_state;
+
+ /*
+ * Found it. Now, retrieve its state to know what to do with it, and
+ * release the pin immediately. We do so to limit overhead as much as
+ * possible. We keep the shared LWLock on the target buffer mapping
+ * partition for now, so this buffer cannot be evicted, and we acquire
+ * an I/O Lock on the buffer as we may need to read its contents from
+ * disk.
+ */
+ bufdesc = GetBufferDescriptor(buf_id);
+
+ LWLockAcquire(BufferDescriptorGetIOLock(bufdesc), LW_SHARED);
+ buf_state = LockBufHdr(bufdesc);
+ UnlockBufHdr(bufdesc, buf_state);
+
+ /* If the page is dirty or invalid, skip it */
+ if ((buf_state & BM_DIRTY) != 0 || (buf_state & BM_TAG_VALID) == 0)
+ {
+ LWLockRelease(BufferDescriptorGetIOLock(bufdesc));
+ LWLockRelease(partLock);
+ return true;
+ }
+
+ /* Read the buffer from disk, with the I/O lock still held */
+ smgrread(smgr, forknum, blkno, buffer);
+ LWLockRelease(BufferDescriptorGetIOLock(bufdesc));
+ }
+ else
+ {
+ /*
+ * Simply read the buffer. There's no risk of modification on it as
+ * we are holding the buffer pool partition mapping lock.
+ */
+ smgrread(smgr, forknum, blkno, buffer);
+ }
+
+ /* buffer lookup done, so now do its check */
+ LWLockRelease(partLock);
+
+ return PageIsVerifiedExtended(buffer, blkno, PIV_REPORT_STAT);
+}