Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb9ed856

Browse files
committed
Revert "Fix race in Parallel Hash Join batch cleanup."
This reverts commit8fa2478.Discussion:https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com
1 parent8fa2478 commitb9ed856

File tree

3 files changed

+32
-58
lines changed

3 files changed

+32
-58
lines changed

‎src/backend/executor/nodeHash.c

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -333,21 +333,14 @@ MultiExecParallelHash(HashState *node)
333333
hashtable->nbuckets=pstate->nbuckets;
334334
hashtable->log2_nbuckets=my_log2(hashtable->nbuckets);
335335
hashtable->totalTuples=pstate->total_tuples;
336-
337-
/*
338-
* Unless we're completely done and the batch state has been freed, make
339-
* sure we have accessors.
340-
*/
341-
if (BarrierPhase(build_barrier)<PHJ_BUILD_DONE)
342-
ExecParallelHashEnsureBatchAccessors(hashtable);
336+
ExecParallelHashEnsureBatchAccessors(hashtable);
343337

344338
/*
345339
* The next synchronization point is in ExecHashJoin's HJ_BUILD_HASHTABLE
346-
* case, which will bring the build phase toPHJ_BUILD_RUNNING (if it isn't
340+
* case, which will bring the build phase toPHJ_BUILD_DONE (if it isn't
347341
* there already).
348342
*/
349343
Assert(BarrierPhase(build_barrier)==PHJ_BUILD_HASHING_OUTER||
350-
BarrierPhase(build_barrier)==PHJ_BUILD_RUNNING||
351344
BarrierPhase(build_barrier)==PHJ_BUILD_DONE);
352345
}
353346

@@ -631,7 +624,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
631624
/*
632625
* The next Parallel Hash synchronization point is in
633626
* MultiExecParallelHash(), which will progress it all the way to
634-
*PHJ_BUILD_RUNNING. The caller must not return control from this
627+
*PHJ_BUILD_DONE. The caller must not return control from this
635628
* executor node between now and then.
636629
*/
637630
}
@@ -3026,11 +3019,14 @@ ExecParallelHashEnsureBatchAccessors(HashJoinTable hashtable)
30263019
}
30273020

30283021
/*
3029-
* We should never see a state where the batch-tracking array is freed,
3030-
* because we should have given up sooner if we join when the build barrier
3031-
* has reached the PHJ_BUILD_DONE phase.
3022+
* It's possible for a backend to start up very late so that the whole
3023+
* join is finished and the shm state for tracking batches has already
3024+
* been freed by ExecHashTableDetach(). In that case we'll just leave
3025+
* hashtable->batches as NULL so that ExecParallelHashJoinNewBatch() gives
3026+
* up early.
30323027
*/
3033-
Assert(DsaPointerIsValid(pstate->batches));
3028+
if (!DsaPointerIsValid(pstate->batches))
3029+
return;
30343030

30353031
/* Use hash join memory context. */
30363032
oldcxt=MemoryContextSwitchTo(hashtable->hashCxt);
@@ -3150,17 +3146,9 @@ ExecHashTableDetachBatch(HashJoinTable hashtable)
31503146
void
31513147
ExecHashTableDetach(HashJoinTablehashtable)
31523148
{
3153-
ParallelHashJoinState*pstate=hashtable->parallel_state;
3154-
3155-
/*
3156-
* If we're involved in a parallel query, we must either have got all the
3157-
* way to PHJ_BUILD_RUNNING, or joined too late and be in PHJ_BUILD_DONE.
3158-
*/
3159-
Assert(!pstate||
3160-
BarrierPhase(&pstate->build_barrier) >=PHJ_BUILD_RUNNING);
3161-
3162-
if (pstate&&BarrierPhase(&pstate->build_barrier)==PHJ_BUILD_RUNNING)
3149+
if (hashtable->parallel_state)
31633150
{
3151+
ParallelHashJoinState*pstate=hashtable->parallel_state;
31643152
inti;
31653153

31663154
/* Make sure any temporary files are closed. */
@@ -3176,22 +3164,17 @@ ExecHashTableDetach(HashJoinTable hashtable)
31763164
}
31773165

31783166
/* If we're last to detach, clean up shared memory. */
3179-
if (BarrierArriveAndDetach(&pstate->build_barrier))
3167+
if (BarrierDetach(&pstate->build_barrier))
31803168
{
3181-
/*
3182-
* Late joining processes will see this state and give up
3183-
* immediately.
3184-
*/
3185-
Assert(BarrierPhase(&pstate->build_barrier)==PHJ_BUILD_DONE);
3186-
31873169
if (DsaPointerIsValid(pstate->batches))
31883170
{
31893171
dsa_free(hashtable->area,pstate->batches);
31903172
pstate->batches=InvalidDsaPointer;
31913173
}
31923174
}
3175+
3176+
hashtable->parallel_state=NULL;
31933177
}
3194-
hashtable->parallel_state=NULL;
31953178
}
31963179

31973180
/*

‎src/backend/executor/nodeHashjoin.c

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
* PHJ_BUILD_ALLOCATING -- one sets up the batches and table 0
4646
* PHJ_BUILD_HASHING_INNER -- all hash the inner rel
4747
* PHJ_BUILD_HASHING_OUTER -- (multi-batch only) all hash the outer
48-
* PHJ_BUILD_RUNNING -- building done, probing can begin
49-
* PHJ_BUILD_DONE -- all work complete, one frees batches
48+
* PHJ_BUILD_DONE -- building done, probing can begin
5049
*
5150
* While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may
5251
* be used repeatedly as required to coordinate expansions in the number of
@@ -74,7 +73,7 @@
7473
* batches whenever it encounters them while scanning and probing, which it
7574
* can do because it processes batches in serial order.
7675
*
77-
* OncePHJ_BUILD_RUNNING is reached, backends then split up and process
76+
* OncePHJ_BUILD_DONE is reached, backends then split up and process
7877
* different batches, or gang up and work together on probing batches if there
7978
* aren't enough to go around. For each batch there is a separate barrier
8079
* with the following phases:
@@ -96,16 +95,11 @@
9695
*
9796
* To avoid deadlocks, we never wait for any barrier unless it is known that
9897
* all other backends attached to it are actively executing the node or have
99-
* finished. Practically, that means that we never emit a tuple while attached
100-
* to a barrier, unless the barrier has reached a phase that means that no
101-
* process will wait on it again. We emit tuples while attached to the build
102-
* barrier in phase PHJ_BUILD_RUNNING, and to a per-batch barrier in phase
103-
* PHJ_BATCH_PROBING. These are advanced to PHJ_BUILD_DONE and PHJ_BATCH_DONE
104-
* respectively without waiting, using BarrierArriveAndDetach(). The last to
105-
* detach receives a different return value so that it knows that it's safe to
106-
* clean up. Any straggler process that attaches after that phase is reached
107-
* will see that it's too late to participate or access the relevant shared
108-
* memory objects.
98+
* already arrived. Practically, that means that we never return a tuple
99+
* while attached to a barrier, unless the barrier has reached its final
100+
* state. In the slightly special case of the per-batch barrier, we return
101+
* tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use
102+
* BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting.
109103
*
110104
*-------------------------------------------------------------------------
111105
*/
@@ -323,7 +317,6 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
323317

324318
build_barrier=&parallel_state->build_barrier;
325319
Assert(BarrierPhase(build_barrier)==PHJ_BUILD_HASHING_OUTER||
326-
BarrierPhase(build_barrier)==PHJ_BUILD_RUNNING||
327320
BarrierPhase(build_barrier)==PHJ_BUILD_DONE);
328321
if (BarrierPhase(build_barrier)==PHJ_BUILD_HASHING_OUTER)
329322
{
@@ -336,18 +329,9 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
336329
BarrierArriveAndWait(build_barrier,
337330
WAIT_EVENT_HASH_BUILD_HASHING_OUTER);
338331
}
339-
elseif (BarrierPhase(build_barrier)==PHJ_BUILD_DONE)
340-
{
341-
/*
342-
* If we attached so late that the job is finished and
343-
* the batch state has been freed, we can return
344-
* immediately.
345-
*/
346-
returnNULL;
347-
}
332+
Assert(BarrierPhase(build_barrier)==PHJ_BUILD_DONE);
348333

349334
/* Each backend should now select a batch to work on. */
350-
Assert(BarrierPhase(build_barrier)==PHJ_BUILD_RUNNING);
351335
hashtable->curbatch=-1;
352336
node->hj_JoinState=HJ_NEED_NEW_BATCH;
353337

@@ -1106,6 +1090,14 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
11061090
intstart_batchno;
11071091
intbatchno;
11081092

1093+
/*
1094+
* If we started up so late that the batch tracking array has been freed
1095+
* already by ExecHashTableDetach(), then we are finished. See also
1096+
* ExecParallelHashEnsureBatchAccessors().
1097+
*/
1098+
if (hashtable->batches==NULL)
1099+
return false;
1100+
11091101
/*
11101102
* If we were already attached to a batch, remember not to bother checking
11111103
* it again, and detach from it (possibly freeing the hash table if we are

‎src/include/executor/hashjoin.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,7 @@ typedef struct ParallelHashJoinState
258258
#definePHJ_BUILD_ALLOCATING1
259259
#definePHJ_BUILD_HASHING_INNER2
260260
#definePHJ_BUILD_HASHING_OUTER3
261-
#definePHJ_BUILD_RUNNING4
262-
#definePHJ_BUILD_DONE5
261+
#definePHJ_BUILD_DONE4
263262

264263
/* The phases for probing each batch, used by for batch_barrier. */
265264
#definePHJ_BATCH_ELECTING0

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp