1313 *
1414 *Copyright (c) 2001-2006, PostgreSQL Global Development Group
1515 *
16- *$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.121 2006/03/05 15:58:36 momjian Exp $
16+ *$PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.122 2006/04/06 20:38:00 tgl Exp $
1717 * ----------
1818 */
1919#include "postgres.h"
6969#define PGSTAT_STAT_INTERVAL 500/* How often to write the status file;
7070 * in milliseconds. */
7171
72- #define PGSTAT_DESTROY_DELAY 10000/* How long to keep destroyed objects
73- * known, to give delayed UDP packets
74- * time to arrive; in milliseconds. */
75-
76- #define PGSTAT_DESTROY_COUNT (PGSTAT_DESTROY_DELAY / PGSTAT_STAT_INTERVAL)
77-
7872#define PGSTAT_RESTART_INTERVAL 60/* How often to attempt to restart a
7973 * failed statistics collector; in
8074 * seconds. */
@@ -141,7 +135,6 @@ static intpgStatXactRollback = 0;
141135
142136static TransactionId pgStatDBHashXact = InvalidTransactionId ;
143137static HTAB * pgStatDBHash = NULL ;
144- static HTAB * pgStatBeDead = NULL ;
145138static PgStat_StatBeEntry * pgStatBeTable = NULL ;
146139static int pgStatNumBackends = 0 ;
147140
@@ -1552,7 +1545,6 @@ PgstatCollectorMain(int argc, char *argv[])
15521545int readPipe ;
15531546int len = 0 ;
15541547struct itimerval timeout ;
1555- HASHCTL hash_ctl ;
15561548bool need_timer = false;
15571549
15581550MyProcPid = getpid ();/* reset MyProcPid */
@@ -1614,16 +1606,6 @@ PgstatCollectorMain(int argc, char *argv[])
16141606pgStatRunningInCollector = true;
16151607pgstat_read_statsfile (& pgStatDBHash ,InvalidOid ,NULL ,NULL );
16161608
1617- /*
1618- * Create the dead backend hashtable
1619- */
1620- memset (& hash_ctl ,0 ,sizeof (hash_ctl ));
1621- hash_ctl .keysize = sizeof (int );
1622- hash_ctl .entrysize = sizeof (PgStat_StatBeDead );
1623- hash_ctl .hash = tag_hash ;
1624- pgStatBeDead = hash_create ("Dead Backends" ,PGSTAT_BE_HASH_SIZE ,
1625- & hash_ctl ,HASH_ELEM |HASH_FUNCTION );
1626-
16271609/*
16281610 * Create the known backends table
16291611 */
@@ -2085,7 +2067,6 @@ static int
20852067pgstat_add_backend (PgStat_MsgHdr * msg )
20862068{
20872069PgStat_StatBeEntry * beentry ;
2088- PgStat_StatBeDead * deadbe ;
20892070
20902071/*
20912072 * Check that the backend ID is valid
@@ -2111,32 +2092,13 @@ pgstat_add_backend(PgStat_MsgHdr *msg)
21112092if (beentry -> procpid > 0 && beentry -> procpid == msg -> m_procpid )
21122093return 0 ;
21132094
2114- /*
2115- * Lookup if this backend is known to be dead. This can be caused due to
2116- * messages arriving in the wrong order - e.g. postmaster's BETERM message
2117- * might have arrived before we received all the backends stats messages,
2118- * or even a new backend with the same backendid was faster in sending his
2119- * BESTART.
2120- *
2121- * If the backend is known to be dead, we ignore this add.
2122- */
2123- deadbe = (PgStat_StatBeDead * )hash_search (pgStatBeDead ,
2124- (void * )& (msg -> m_procpid ),
2125- HASH_FIND ,NULL );
2126- if (deadbe )
2127- return 1 ;
2128-
2129- /*
2130- * Backend isn't known to be dead. If it's slot is currently used, we have
2131- * to kick out the old backend.
2132- */
2133- if (beentry -> procpid > 0 )
2134- pgstat_sub_backend (beentry -> procpid );
2135-
21362095/* Must be able to distinguish between empty and non-empty slots */
21372096Assert (msg -> m_procpid > 0 );
21382097
2139- /* Put this new backend into the slot */
2098+ /*
2099+ * Put this new backend into the slot (possibly overwriting an old entry,
2100+ * if we missed its BETERM or the BETERM hasn't arrived yet).
2101+ */
21402102beentry -> procpid = msg -> m_procpid ;
21412103beentry -> start_timestamp = GetCurrentTimestamp ();
21422104beentry -> activity_start_timestamp = 0 ;
@@ -2185,7 +2147,6 @@ pgstat_get_db_entry(Oid databaseid, bool create)
21852147result -> n_xact_rollback = 0 ;
21862148result -> n_blocks_fetched = 0 ;
21872149result -> n_blocks_hit = 0 ;
2188- result -> destroy = 0 ;
21892150result -> last_autovac_time = 0 ;
21902151
21912152memset (& hash_ctl ,0 ,sizeof (hash_ctl ));
@@ -2211,8 +2172,6 @@ static void
22112172pgstat_sub_backend (int procpid )
22122173{
22132174int i ;
2214- PgStat_StatBeDead * deadbe ;
2215- bool found ;
22162175
22172176/*
22182177 * Search in the known-backends table for the slot containing this PID.
@@ -2222,28 +2181,7 @@ pgstat_sub_backend(int procpid)
22222181if (pgStatBeTable [i ].procpid == procpid )
22232182{
22242183/*
2225- * That's him. Add an entry to the known to be dead backends. Due
2226- * to possible misorder in the arrival of UDP packets it's
2227- * possible that even if we know the backend is dead, there could
2228- * still be messages queued that arrive later. Those messages must
2229- * not cause our number of backends statistics to get screwed up,
2230- * so we remember for a couple of seconds that this PID is dead
2231- * and ignore them (only the counting of backends, not the table
2232- * access stats they sent).
2233- */
2234- deadbe = (PgStat_StatBeDead * )hash_search (pgStatBeDead ,
2235- (void * )& procpid ,
2236- HASH_ENTER ,
2237- & found );
2238-
2239- if (!found )
2240- {
2241- deadbe -> backendid = i + 1 ;
2242- deadbe -> destroy = PGSTAT_DESTROY_COUNT ;
2243- }
2244-
2245- /*
2246- * Declare the backend slot empty.
2184+ * That's him. Mark the backend slot empty.
22472185 */
22482186pgStatBeTable [i ].procpid = 0 ;
22492187return ;
@@ -2270,7 +2208,6 @@ pgstat_write_statsfile(void)
22702208HASH_SEQ_STATUS tstat ;
22712209PgStat_StatDBEntry * dbentry ;
22722210PgStat_StatTabEntry * tabentry ;
2273- PgStat_StatBeDead * deadbe ;
22742211FILE * fpout ;
22752212int i ;
22762213int32 format_id ;
@@ -2300,31 +2237,6 @@ pgstat_write_statsfile(void)
23002237hash_seq_init (& hstat ,pgStatDBHash );
23012238while ((dbentry = (PgStat_StatDBEntry * )hash_seq_search (& hstat ))!= NULL )
23022239{
2303- /*
2304- * If this database is marked destroyed, count down and do so if it
2305- * reaches 0.
2306- */
2307- if (dbentry -> destroy > 0 )
2308- {
2309- if (-- (dbentry -> destroy )== 0 )
2310- {
2311- if (dbentry -> tables != NULL )
2312- hash_destroy (dbentry -> tables );
2313-
2314- if (hash_search (pgStatDBHash ,
2315- (void * )& (dbentry -> databaseid ),
2316- HASH_REMOVE ,NULL )== NULL )
2317- ereport (ERROR ,
2318- (errmsg ("database hash table corrupted "
2319- "during cleanup --- abort" )));
2320- }
2321-
2322- /*
2323- * Don't include statistics for it.
2324- */
2325- continue ;
2326- }
2327-
23282240/*
23292241 * Write out the DB entry including the number of live backends.
23302242 * We don't write the tables pointer since it's of no use to any
@@ -2339,30 +2251,6 @@ pgstat_write_statsfile(void)
23392251hash_seq_init (& tstat ,dbentry -> tables );
23402252while ((tabentry = (PgStat_StatTabEntry * )hash_seq_search (& tstat ))!= NULL )
23412253{
2342- /*
2343- * If table entry marked for destruction, same as above for the
2344- * database entry.
2345- */
2346- if (tabentry -> destroy > 0 )
2347- {
2348- if (-- (tabentry -> destroy )== 0 )
2349- {
2350- if (hash_search (dbentry -> tables ,
2351- (void * )& (tabentry -> tableid ),
2352- HASH_REMOVE ,NULL )== NULL )
2353- ereport (ERROR ,
2354- (errmsg ("tables hash table for "
2355- "database %u corrupted during "
2356- "cleanup --- abort" ,
2357- dbentry -> databaseid )));
2358- }
2359- continue ;
2360- }
2361-
2362- /*
2363- * At least we think this is still a live table. Emit its access
2364- * stats.
2365- */
23662254fputc ('T' ,fpout );
23672255fwrite (tabentry ,sizeof (PgStat_StatTabEntry ),1 ,fpout );
23682256}
@@ -2428,26 +2316,6 @@ pgstat_write_statsfile(void)
24282316PGSTAT_STAT_TMPFILE ,PGSTAT_STAT_FILENAME )));
24292317unlink (PGSTAT_STAT_TMPFILE );
24302318}
2431-
2432- /*
2433- * Clear out the dead backends table
2434- */
2435- hash_seq_init (& hstat ,pgStatBeDead );
2436- while ((deadbe = (PgStat_StatBeDead * )hash_seq_search (& hstat ))!= NULL )
2437- {
2438- /*
2439- * Count down the destroy delay and remove entries where it reaches 0.
2440- */
2441- if (-- (deadbe -> destroy ) <=0 )
2442- {
2443- if (hash_search (pgStatBeDead ,
2444- (void * )& (deadbe -> procpid ),
2445- HASH_REMOVE ,NULL )== NULL )
2446- ereport (ERROR ,
2447- (errmsg ("dead-server-process hash table corrupted "
2448- "during cleanup --- abort" )));
2449- }
2450- }
24512319}
24522320
24532321/*
@@ -2595,7 +2463,6 @@ pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
25952463
25962464memcpy (dbentry ,& dbbuf ,sizeof (PgStat_StatDBEntry ));
25972465dbentry -> tables = NULL ;
2598- dbentry -> destroy = 0 ;
25992466dbentry -> n_backends = 0 ;
26002467
26012468/*
@@ -3005,12 +2872,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
30052872dbentry = pgstat_get_db_entry (msg -> m_databaseid , true);
30062873
30072874/*
3008- * If the database is marked for destroy, this is a delayed UDP packet and
3009- * not worth being counted.
2875+ * Update database-wide stats.
30102876 */
3011- if (dbentry -> destroy > 0 )
3012- return ;
3013-
30142877dbentry -> n_xact_commit += (PgStat_Counter ) (msg -> m_xact_commit );
30152878dbentry -> n_xact_rollback += (PgStat_Counter ) (msg -> m_xact_rollback );
30162879
@@ -3043,8 +2906,6 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
30432906
30442907tabentry -> blocks_fetched = tabmsg [i ].t_blocks_fetched ;
30452908tabentry -> blocks_hit = tabmsg [i ].t_blocks_hit ;
3046-
3047- tabentry -> destroy = 0 ;
30482909}
30492910else
30502911{
@@ -3085,7 +2946,6 @@ static void
30852946pgstat_recv_tabpurge (PgStat_MsgTabpurge * msg ,int len )
30862947{
30872948PgStat_StatDBEntry * dbentry ;
3088- PgStat_StatTabEntry * tabentry ;
30892949int i ;
30902950
30912951/*
@@ -3102,23 +2962,15 @@ pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
31022962if (!dbentry || !dbentry -> tables )
31032963return ;
31042964
3105- /*
3106- * If the database is marked for destroy, this is a delayed UDP packet and
3107- * the tables will go away at DB destruction.
3108- */
3109- if (dbentry -> destroy > 0 )
3110- return ;
3111-
31122965/*
31132966 * Process all table entries in the message.
31142967 */
31152968for (i = 0 ;i < msg -> m_nentries ;i ++ )
31162969{
3117- tabentry = (PgStat_StatTabEntry * )hash_search (dbentry -> tables ,
3118- (void * )& (msg -> m_tableid [i ]),
3119- HASH_FIND ,NULL );
3120- if (tabentry )
3121- tabentry -> destroy = PGSTAT_DESTROY_COUNT ;
2970+ /* Remove from hashtable if present; we don't care if it's not. */
2971+ (void )hash_search (dbentry -> tables ,
2972+ (void * )& (msg -> m_tableid [i ]),
2973+ HASH_REMOVE ,NULL );
31222974}
31232975}
31242976
@@ -3146,10 +2998,20 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
31462998dbentry = pgstat_get_db_entry (msg -> m_databaseid , false);
31472999
31483000/*
3149- *Mark the database for destruction .
3001+ *If found, remove it .
31503002 */
31513003if (dbentry )
3152- dbentry -> destroy = PGSTAT_DESTROY_COUNT ;
3004+ {
3005+ if (dbentry -> tables != NULL )
3006+ hash_destroy (dbentry -> tables );
3007+
3008+ if (hash_search (pgStatDBHash ,
3009+ (void * )& (dbentry -> databaseid ),
3010+ HASH_REMOVE ,NULL )== NULL )
3011+ ereport (ERROR ,
3012+ (errmsg ("database hash table corrupted "
3013+ "during cleanup --- abort" )));
3014+ }
31533015}
31543016
31553017
@@ -3191,7 +3053,6 @@ pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
31913053dbentry -> n_xact_rollback = 0 ;
31923054dbentry -> n_blocks_fetched = 0 ;
31933055dbentry -> n_blocks_hit = 0 ;
3194- dbentry -> destroy = 0 ;
31953056
31963057memset (& hash_ctl ,0 ,sizeof (hash_ctl ));
31973058hash_ctl .keysize = sizeof (Oid );