|
| 1 | +/*------------------------------------------------------------------------- |
| 2 | + * |
| 3 | + * shm_toc.c |
| 4 | + * shared memory segment table of contents |
| 5 | + * |
| 6 | + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group |
| 7 | + * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | + * |
| 9 | + * src/include/storage/shm_toc.c |
| 10 | + * |
| 11 | + *------------------------------------------------------------------------- |
| 12 | + */ |
| 13 | + |
| 14 | +#include"postgres.h" |
| 15 | + |
| 16 | +#include"storage/barrier.h" |
| 17 | +#include"storage/shm_toc.h" |
| 18 | +#include"storage/spin.h" |
| 19 | + |
| 20 | +typedefstructshm_toc_entry |
| 21 | +{ |
| 22 | +uint64key;/* Arbitrary identifier */ |
| 23 | +uint64offset;/* Bytes offset */ |
| 24 | +}shm_toc_entry; |
| 25 | + |
| 26 | +structshm_toc |
| 27 | +{ |
| 28 | +uint64toc_magic;/* Magic number for this TOC */ |
| 29 | +slock_ttoc_mutex;/* Spinlock for mutual exclusion */ |
| 30 | +Sizetoc_total_bytes;/* Bytes managed by this TOC */ |
| 31 | +Sizetoc_allocated_bytes;/* Bytes allocated of those managed */ |
| 32 | +Sizetoc_nentry;/* Number of entries in TOC */ |
| 33 | +shm_toc_entrytoc_entry[FLEXIBLE_ARRAY_MEMBER]; |
| 34 | +}; |
| 35 | + |
| 36 | +/* |
| 37 | + * Initialize a region of shared memory with a table of contents. |
| 38 | + */ |
| 39 | +shm_toc* |
| 40 | +shm_toc_create(uint64magic,void*address,Sizenbytes) |
| 41 | +{ |
| 42 | +shm_toc*toc= (shm_toc*)address; |
| 43 | + |
| 44 | +Assert(nbytes> offsetof(shm_toc,toc_entry)); |
| 45 | +toc->toc_magic=magic; |
| 46 | +SpinLockInit(&toc->toc_mutex); |
| 47 | +toc->toc_total_bytes=nbytes; |
| 48 | +toc->toc_allocated_bytes=0; |
| 49 | +toc->toc_nentry=0; |
| 50 | + |
| 51 | +returntoc; |
| 52 | +} |
| 53 | + |
| 54 | +/* |
| 55 | + * Attach to an existing table of contents. If the magic number found at |
| 56 | + * the target address doesn't match our expectations, returns NULL. |
| 57 | + */ |
| 58 | +externshm_toc* |
| 59 | +shm_toc_attach(uint64magic,void*address) |
| 60 | +{ |
| 61 | +shm_toc*toc= (shm_toc*)address; |
| 62 | + |
| 63 | +if (toc->toc_magic!=magic) |
| 64 | +returnNULL; |
| 65 | + |
| 66 | +Assert(toc->toc_total_bytes >=toc->toc_allocated_bytes); |
| 67 | +Assert(toc->toc_total_bytes >= offsetof(shm_toc,toc_entry)); |
| 68 | + |
| 69 | +returntoc; |
| 70 | +} |
| 71 | + |
| 72 | +/* |
| 73 | + * Allocate shared memory from a segment managed by a table of contents. |
| 74 | + * |
| 75 | + * This is not a full-blown allocator; there's no way to free memory. It's |
| 76 | + * just a way of dividing a single physical shared memory segment into logical |
| 77 | + * chunks that may be used for different purposes. |
| 78 | + * |
| 79 | + * We allocated backwards from the end of the segment, so that the TOC entries |
| 80 | + * can grow forward from the start of the segment. |
| 81 | + */ |
| 82 | +externvoid* |
| 83 | +shm_toc_allocate(shm_toc*toc,Sizenbytes) |
| 84 | +{ |
| 85 | +volatileshm_toc*vtoc=toc; |
| 86 | +Sizetotal_bytes; |
| 87 | +Sizeallocated_bytes; |
| 88 | +Sizenentry; |
| 89 | +Sizetoc_bytes; |
| 90 | + |
| 91 | +/* Make sure request is well-aligned. */ |
| 92 | +nbytes=BUFFERALIGN(nbytes); |
| 93 | + |
| 94 | +SpinLockAcquire(&toc->toc_mutex); |
| 95 | + |
| 96 | +total_bytes=vtoc->toc_total_bytes; |
| 97 | +allocated_bytes=vtoc->toc_allocated_bytes; |
| 98 | +nentry=vtoc->toc_nentry; |
| 99 | +toc_bytes= offsetof(shm_toc,toc_entry)+nentry*sizeof(shm_toc_entry) |
| 100 | ++allocated_bytes; |
| 101 | + |
| 102 | +/* Check for memory exhaustion and overflow. */ |
| 103 | +if (toc_bytes+nbytes>total_bytes||toc_bytes+nbytes<toc_bytes) |
| 104 | +{ |
| 105 | +SpinLockRelease(&toc->toc_mutex); |
| 106 | +ereport(ERROR, |
| 107 | +(errcode(ERRCODE_OUT_OF_MEMORY), |
| 108 | +errmsg("out of shared memory"))); |
| 109 | +} |
| 110 | +vtoc->toc_allocated_bytes+=nbytes; |
| 111 | + |
| 112 | +SpinLockRelease(&toc->toc_mutex); |
| 113 | + |
| 114 | +return ((char*)toc)+ (total_bytes-allocated_bytes-nbytes); |
| 115 | +} |
| 116 | + |
| 117 | +/* |
| 118 | + * Return the number of bytes that can still be allocated. |
| 119 | + */ |
| 120 | +externSize |
| 121 | +shm_toc_freespace(shm_toc*toc) |
| 122 | +{ |
| 123 | +volatileshm_toc*vtoc=toc; |
| 124 | +Sizetotal_bytes; |
| 125 | +Sizeallocated_bytes; |
| 126 | +Sizenentry; |
| 127 | +Sizetoc_bytes; |
| 128 | + |
| 129 | +SpinLockAcquire(&toc->toc_mutex); |
| 130 | +total_bytes=vtoc->toc_total_bytes; |
| 131 | +allocated_bytes=vtoc->toc_allocated_bytes; |
| 132 | +nentry=vtoc->toc_nentry; |
| 133 | +SpinLockRelease(&toc->toc_mutex); |
| 134 | + |
| 135 | +toc_bytes= offsetof(shm_toc,toc_entry)+nentry*sizeof(shm_toc_entry); |
| 136 | +Assert(allocated_bytes+BUFFERALIGN(toc_bytes) <=total_bytes); |
| 137 | +returntotal_bytes- (allocated_bytes+BUFFERALIGN(toc_bytes)); |
| 138 | +} |
| 139 | + |
| 140 | +/* |
| 141 | + * Insert a TOC entry. |
| 142 | + * |
| 143 | + * The idea here is that process setting up the shared memory segment will |
| 144 | + * register the addresses of data structures within the segment using this |
| 145 | + * function. Each data structure will be identified using a 64-bit key, which |
| 146 | + * is assumed to be a well-known or discoverable integer. Other processes |
| 147 | + * accessing the shared memory segment can pass the same key to |
| 148 | + * shm_toc_lookup() to discover the addresses of those data structures. |
| 149 | + * |
| 150 | + * Since the shared memory segment may be mapped at different addresses within |
| 151 | + * different backends, we store relative rather than absolute pointers. |
| 152 | + * |
| 153 | + * This won't scale well to a large number of keys. Hopefully, that isn't |
| 154 | + * necessary; if it proves to be, we might need to provide a more sophisticated |
| 155 | + * data structure here. But the real idea here is just to give someone mapping |
| 156 | + * a dynamic shared memory the ability to find the bare minimum number of |
| 157 | + * pointers that they need to bootstrap. If you're storing a lot of stuff in |
| 158 | + * here, you're doing it wrong. |
| 159 | + */ |
| 160 | +void |
| 161 | +shm_toc_insert(shm_toc*toc,uint64key,void*address) |
| 162 | +{ |
| 163 | +volatileshm_toc*vtoc=toc; |
| 164 | +uint64total_bytes; |
| 165 | +uint64allocated_bytes; |
| 166 | +uint64nentry; |
| 167 | +uint64toc_bytes; |
| 168 | +uint64offset; |
| 169 | + |
| 170 | +/* Relativize pointer. */ |
| 171 | +Assert(address> (void*)toc); |
| 172 | +offset= ((char*)address)- (char*)toc; |
| 173 | + |
| 174 | +SpinLockAcquire(&toc->toc_mutex); |
| 175 | + |
| 176 | +total_bytes=vtoc->toc_total_bytes; |
| 177 | +allocated_bytes=vtoc->toc_allocated_bytes; |
| 178 | +nentry=vtoc->toc_nentry; |
| 179 | +toc_bytes= offsetof(shm_toc,toc_entry)+nentry*sizeof(shm_toc_entry) |
| 180 | ++allocated_bytes; |
| 181 | + |
| 182 | +/* Check for memory exhaustion and overflow. */ |
| 183 | +if (toc_bytes+sizeof(shm_toc_entry)>total_bytes|| |
| 184 | +toc_bytes+sizeof(shm_toc_entry)<toc_bytes) |
| 185 | +{ |
| 186 | +SpinLockRelease(&toc->toc_mutex); |
| 187 | +ereport(ERROR, |
| 188 | +(errcode(ERRCODE_OUT_OF_MEMORY), |
| 189 | +errmsg("out of shared memory"))); |
| 190 | +} |
| 191 | + |
| 192 | +Assert(offset<total_bytes); |
| 193 | +vtoc->toc_entry[nentry].key=key; |
| 194 | +vtoc->toc_entry[nentry].offset=offset; |
| 195 | + |
| 196 | +/* |
| 197 | + * By placing a write barrier after filling in the entry and before |
| 198 | + * updating the number of entries, we make it safe to read the TOC |
| 199 | + * unlocked. |
| 200 | + */ |
| 201 | +pg_write_barrier(); |
| 202 | + |
| 203 | +vtoc->toc_nentry++; |
| 204 | + |
| 205 | +SpinLockRelease(&toc->toc_mutex); |
| 206 | +} |
| 207 | + |
| 208 | +/* |
| 209 | + * Look up a TOC entry. |
| 210 | + * |
| 211 | + * Unlike the other functions in this file, this operation acquires no lock; |
| 212 | + * it uses only barriers. It probably wouldn't hurt concurrency very much even |
| 213 | + * if it did get a lock, but since it's reasonably likely that a group of |
| 214 | + * worker processes could each read a series of entries from the same TOC |
| 215 | + * right around the same time, there seems to be some value in avoiding it. |
| 216 | + */ |
| 217 | +void* |
| 218 | +shm_toc_lookup(shm_toc*toc,uint64key) |
| 219 | +{ |
| 220 | +uint64nentry; |
| 221 | +uint64i; |
| 222 | + |
| 223 | +/* Read the number of entries before we examine any entry. */ |
| 224 | +nentry=toc->toc_nentry; |
| 225 | +pg_read_barrier(); |
| 226 | + |
| 227 | +/* Now search for a matching entry. */ |
| 228 | +for (i=0;i<nentry;++i) |
| 229 | +if (toc->toc_entry[i].key==key) |
| 230 | +return ((char*)toc)+toc->toc_entry[i].offset; |
| 231 | + |
| 232 | +/* No matching entry was found. */ |
| 233 | +returnNULL; |
| 234 | +} |
| 235 | + |
| 236 | +/* |
| 237 | + * Estimate how much shared memory will be required to store a TOC and its |
| 238 | + * dependent data structures. |
| 239 | + */ |
| 240 | +Size |
| 241 | +shm_toc_estimate(shm_toc_estimator*e) |
| 242 | +{ |
| 243 | +returnadd_size(offsetof(shm_toc,toc_entry), |
| 244 | +add_size(mul_size(e->number_of_keys,sizeof(shm_toc_entry)), |
| 245 | +e->space_for_chunks)); |
| 246 | +} |