@@ -127,6 +127,9 @@ static void bt_check_every_level(Relation rel, Relation heaprel,
127
127
bool readonly ,bool heapallindexed );
128
128
static BtreeLevel bt_check_level_from_leftmost (BtreeCheckState * state ,
129
129
BtreeLevel level );
130
+ static bool bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
131
+ BlockNumber start ,
132
+ BTPageOpaque start_opaque );
130
133
static void bt_target_page_check (BtreeCheckState * state );
131
134
static ScanKey bt_right_page_check_scankey (BtreeCheckState * state );
132
135
static void bt_downlink_check (BtreeCheckState * state ,BlockNumber childblock ,
@@ -716,7 +719,7 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
716
719
*/
717
720
if (state -> readonly )
718
721
{
719
- if (!P_LEFTMOST ( opaque ))
722
+ if (!bt_leftmost_ignoring_half_dead ( state , current , opaque ))
720
723
ereport (ERROR ,
721
724
(errcode (ERRCODE_INDEX_CORRUPTED ),
722
725
errmsg ("block %u is not leftmost in index \"%s\"" ,
@@ -769,10 +772,14 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
769
772
}
770
773
771
774
/*
772
- * readonly mode can only ever land on live pages and half-dead pages,
773
- * so sibling pointers should always be in mutual agreement
775
+ * Sibling links should be in mutual agreement. There arises
776
+ * leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
777
+ * of the parent's low-key downlink is half-dead. (A half-dead page
778
+ * has no downlink from its parent.) Under heavyweight locking, the
779
+ * last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
774
780
*/
775
- if (state -> readonly && opaque -> btpo_prev != leftcurrent )
781
+ if (state -> readonly &&
782
+ opaque -> btpo_prev != leftcurrent && leftcurrent != P_NONE )
776
783
ereport (ERROR ,
777
784
(errcode (ERRCODE_INDEX_CORRUPTED ),
778
785
errmsg ("left link/right link pair in index \"%s\" not in agreement" ,
@@ -822,6 +829,67 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
822
829
return nextleveldown ;
823
830
}
824
831
832
+ /*
833
+ * Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
834
+ * half-dead, sibling-linked pages to the left. If a half-dead page appears
835
+ * under state->readonly, the database exited recovery between the first-stage
836
+ * and second-stage WAL records of a deletion.
837
+ */
838
+ static bool
839
+ bt_leftmost_ignoring_half_dead (BtreeCheckState * state ,
840
+ BlockNumber start ,
841
+ BTPageOpaque start_opaque )
842
+ {
843
+ BlockNumber reached = start_opaque -> btpo_prev ,
844
+ reached_from = start ;
845
+ bool all_half_dead = true;
846
+
847
+ /*
848
+ * To handle the !readonly case, we'd need to accept BTP_DELETED pages and
849
+ * potentially observe nbtree/README "Page deletion and backwards scans".
850
+ */
851
+ Assert (state -> readonly );
852
+
853
+ while (reached != P_NONE && all_half_dead )
854
+ {
855
+ Page page = palloc_btree_page (state ,reached );
856
+ BTPageOpaque reached_opaque = (BTPageOpaque )PageGetSpecialPointer (page );
857
+
858
+ CHECK_FOR_INTERRUPTS ();
859
+
860
+ /*
861
+ * Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
862
+ * writes that side-links will continue to point to the siblings.
863
+ * Check btpo_next for that property.
864
+ */
865
+ all_half_dead = P_ISHALFDEAD (reached_opaque )&&
866
+ reached != start &&
867
+ reached != reached_from &&
868
+ reached_opaque -> btpo_next == reached_from ;
869
+ if (all_half_dead )
870
+ {
871
+ XLogRecPtr pagelsn = PageGetLSN (page );
872
+
873
+ /* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
874
+ ereport (DEBUG1 ,
875
+ (errcode (ERRCODE_NO_DATA ),
876
+ errmsg_internal ("harmless interrupted page deletion detected in index \"%s\"" ,
877
+ RelationGetRelationName (state -> rel )),
878
+ errdetail_internal ("Block=%u right block=%u page lsn=%X/%X." ,
879
+ reached ,reached_from ,
880
+ (uint32 ) (pagelsn >>32 ),
881
+ (uint32 )pagelsn )));
882
+
883
+ reached_from = reached ;
884
+ reached = reached_opaque -> btpo_prev ;
885
+ }
886
+
887
+ pfree (page );
888
+ }
889
+
890
+ return all_half_dead ;
891
+ }
892
+
825
893
/*
826
894
* Function performs the following checks on target page, or pages ancillary to
827
895
* target page: