77 * Copyright (c) 2001-2006, PostgreSQL Global Development Group
88 *
99 * IDENTIFICATION
10- * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.17 2006/06/07 18:49:03 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/executor/instrument.c,v 1.18 2006/06/09 19:30:56 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
1414#include "postgres.h"
1515
16- #include <math.h>
1716#include <unistd.h>
1817
1918#include "executor/instrument.h"
2019
2120
22- /*
23- * As of PostgreSQL 8.2, we try to reduce the overhead of EXPLAIN ANALYZE
24- * by not calling INSTR_TIME_SET_CURRENT() for every single node execution.
25- * (Because that requires a kernel call on most systems, it's expensive.)
26- *
27- * This macro determines the sampling interval: that is, after how many more
28- * iterations will we take the next time sample, given that niters iterations
29- * have occurred already. In general, if the function is f(x), then for N
30- * iterations we will take on the order of integral(1/f(x), x=0..N)
31- * samples. Some examples follow, with the number of samples that would be
32- * collected over 1,000,000 iterations:
33- *
34- *f(x) = x => log2(N) 20
35- *f(x) = x^(1/2) => 2 * N^(1/2) 2000
36- *f(x) = x^(1/3) => 1.5 * N^(2/3) 15000
37- *
38- * I've chosen the last one as it seems to provide a good compromise between
39- * low overhead but still getting a meaningful number of samples. However,
40- * not all machines have the cbrt() function so on those we substitute
41- * sqrt(). The difference is not very significant in the tests I made.
42- *
43- * The actual sampling interval is randomized with the SampleFunc() value
44- * as the mean; this hopefully will reduce any measurement bias due to
45- * variation in the node execution time.
46- */
47- #ifdef HAVE_CBRT
48- #define SampleFunc (niters ) cbrt(niters)
49- #else
50- #define SampleFunc (niters ) sqrt(niters)
51- #endif
52-
53- #define SampleInterval (niters ) \
54- (SampleFunc(niters) * (double) random() / (double) (MAX_RANDOM_VALUE/2))
55-
56- /*
57- * We sample at every node iteration until we've reached this threshold,
58- * so that nodes not called a large number of times are completely accurate.
59- * (Perhaps this should be a GUC variable?)
60- */
61- #define SAMPLE_THRESHOLD 50
62-
63- /*
64- * Determine the sampling overhead. This only needs to be done once per
65- * backend (actually, probably once per postmaster would do ...)
66- */
67- static double SampleOverhead ;
68- static bool SampleOverheadCalculated = false;
69-
70- static void
71- CalculateSampleOverhead (void )
72- {
73- int i ;
74-
75- /*
76- * We could get a wrong result due to being interrupted by task switch.
77- * To minimize the risk we do it a few times and take the lowest.
78- */
79- SampleOverhead = 1.0e6 ;
80-
81- for (i = 0 ;i < 5 ;i ++ )
82- {
83- Instrumentation timer ;
84- instr_time tmptime ;
85- int j ;
86- double overhead ;
87-
88- memset (& timer ,0 ,sizeof (timer ));
89- InstrStartNode (& timer );
90- #define TEST_COUNT 100
91- for (j = 0 ;j < TEST_COUNT ;j ++ )
92- {
93- INSTR_TIME_SET_CURRENT (tmptime );
94- }
95- InstrStopNode (& timer ,1 );
96- overhead = INSTR_TIME_GET_DOUBLE (timer .counter ) /TEST_COUNT ;
97- if (overhead < SampleOverhead )
98- SampleOverhead = overhead ;
99- }
100-
101- SampleOverheadCalculated = true;
102- }
103-
104-
10521/* Allocate new instrumentation structure(s) */
10622Instrumentation *
10723InstrAlloc (int n )
10824{
109- Instrumentation * instr ;
110-
111- /* Calculate sampling overhead, if not done yet in this backend */
112- if (!SampleOverheadCalculated )
113- CalculateSampleOverhead ();
114-
115- instr = palloc0 (n * sizeof (Instrumentation ));
25+ Instrumentation * instr = palloc0 (n * sizeof (Instrumentation ));
11626
11727/* we don't need to do any initialization except zero 'em */
11828
12434InstrStartNode (Instrumentation * instr )
12535{
12636if (INSTR_TIME_IS_ZERO (instr -> starttime ))
127- {
128- /*
129- * Always sample if not yet up to threshold, else check whether
130- * next threshold has been reached
131- */
132- if (instr -> itercount < SAMPLE_THRESHOLD )
133- instr -> sampling = true;
134- else if (instr -> itercount >=instr -> nextsample )
135- {
136- instr -> sampling = true;
137- instr -> nextsample =
138- instr -> itercount + SampleInterval (instr -> itercount );
139- }
140- if (instr -> sampling )
141- INSTR_TIME_SET_CURRENT (instr -> starttime );
142- }
37+ INSTR_TIME_SET_CURRENT (instr -> starttime );
14338else
14439elog (DEBUG2 ,"InstrStartNode called twice in a row" );
14540}
@@ -148,53 +43,39 @@ InstrStartNode(Instrumentation *instr)
14843void
14944InstrStopNode (Instrumentation * instr ,double nTuples )
15045{
151- /* count the returned tuples and iterations */
46+ instr_time endtime ;
47+
48+ /* count the returned tuples */
15249instr -> tuplecount += nTuples ;
153- instr -> itercount += 1 ;
15450
155- /* measure runtime if appropriate */
156- if (instr -> sampling )
51+ if (INSTR_TIME_IS_ZERO (instr -> starttime ))
15752{
158- instr_time endtime ;
159-
160- /*
161- * To be sure that SampleOverhead accurately reflects the extra
162- * overhead, we must do INSTR_TIME_SET_CURRENT() as the *first*
163- * action that is different between the sampling and non-sampling
164- * code paths.
165- */
166- INSTR_TIME_SET_CURRENT (endtime );
53+ elog (DEBUG2 ,"InstrStopNode called without start" );
54+ return ;
55+ }
16756
168- if (INSTR_TIME_IS_ZERO (instr -> starttime ))
169- {
170- elog (DEBUG2 ,"InstrStopNode called without start" );
171- return ;
172- }
57+ INSTR_TIME_SET_CURRENT (endtime );
17358
17459#ifndef WIN32
175- instr -> counter .tv_sec += endtime .tv_sec - instr -> starttime .tv_sec ;
176- instr -> counter .tv_usec += endtime .tv_usec - instr -> starttime .tv_usec ;
60+ instr -> counter .tv_sec += endtime .tv_sec - instr -> starttime .tv_sec ;
61+ instr -> counter .tv_usec += endtime .tv_usec - instr -> starttime .tv_usec ;
17762
178- /* Normalize after each add to avoid overflow/underflow of tv_usec */
179- while (instr -> counter .tv_usec < 0 )
180- {
181- instr -> counter .tv_usec += 1000000 ;
182- instr -> counter .tv_sec -- ;
183- }
184- while (instr -> counter .tv_usec >=1000000 )
185- {
186- instr -> counter .tv_usec -= 1000000 ;
187- instr -> counter .tv_sec ++ ;
188- }
63+ /* Normalize after each add to avoid overflow/underflow of tv_usec */
64+ while (instr -> counter .tv_usec < 0 )
65+ {
66+ instr -> counter .tv_usec += 1000000 ;
67+ instr -> counter .tv_sec -- ;
68+ }
69+ while (instr -> counter .tv_usec >=1000000 )
70+ {
71+ instr -> counter .tv_usec -= 1000000 ;
72+ instr -> counter .tv_sec ++ ;
73+ }
18974#else /* WIN32 */
190- instr -> counter .QuadPart += (endtime .QuadPart - instr -> starttime .QuadPart );
75+ instr -> counter .QuadPart += (endtime .QuadPart - instr -> starttime .QuadPart );
19176#endif
19277
193- INSTR_TIME_SET_ZERO (instr -> starttime );
194-
195- instr -> samplecount += 1 ;
196- instr -> sampling = false;
197- }
78+ INSTR_TIME_SET_ZERO (instr -> starttime );
19879
19980/* Is this the first tuple of this cycle? */
20081if (!instr -> running )
@@ -217,44 +98,18 @@ InstrEndLoop(Instrumentation *instr)
21798if (!INSTR_TIME_IS_ZERO (instr -> starttime ))
21899elog (DEBUG2 ,"InstrEndLoop called on running node" );
219100
220- /*Compute time spent in node */
101+ /*Accumulate per-cycle statistics into totals */
221102totaltime = INSTR_TIME_GET_DOUBLE (instr -> counter );
222103
223- /*
224- * If we didn't measure runtime on every iteration, then we must increase
225- * the measured total to account for the other iterations. Naively
226- * multiplying totaltime by itercount/samplecount would be wrong because
227- * it effectively assumes the sampling overhead applies to all iterations,
228- * even the ones we didn't measure. (Note that what we are trying to
229- * estimate here is the actual time spent in the node, including the
230- * actual measurement overhead; not the time exclusive of measurement
231- * overhead.) We exclude the first iteration from the correction basis,
232- * because it often takes longer than others.
233- */
234- if (instr -> itercount > instr -> samplecount )
235- {
236- double per_iter ;
237-
238- per_iter = (totaltime - instr -> firsttuple ) / (instr -> samplecount - 1 )
239- - SampleOverhead ;
240- if (per_iter > 0 )/* sanity check */
241- totaltime += per_iter * (instr -> itercount - instr -> samplecount );
242- }
243-
244- /* Accumulate per-cycle statistics into totals */
245104instr -> startup += instr -> firsttuple ;
246105instr -> total += totaltime ;
247106instr -> ntuples += instr -> tuplecount ;
248107instr -> nloops += 1 ;
249108
250109/* Reset for next cycle (if any) */
251110instr -> running = false;
252- instr -> sampling = false;
253111INSTR_TIME_SET_ZERO (instr -> starttime );
254112INSTR_TIME_SET_ZERO (instr -> counter );
255113instr -> firsttuple = 0 ;
256114instr -> tuplecount = 0 ;
257- instr -> itercount = 0 ;
258- instr -> samplecount = 0 ;
259- instr -> nextsample = 0 ;
260115}