1919#include <signal.h>
2020#include <unistd.h>
2121#include <sys/file.h>
22+ #include <sys/mman.h>
2223#include <sys/stat.h>
2324#ifdef HAVE_SYS_IPC_H
2425#include <sys/ipc.h>
@@ -43,9 +44,22 @@ typedef int IpcMemoryId;/* shared memory ID returned by shmget(2) */
4344#define PG_SHMAT_FLAGS 0
4445#endif
4546
47+ /* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
48+ #ifndef MAP_ANONYMOUS
49+ #define MAP_ANONYMOUS MAP_ANON
50+ #endif
51+
52+ /* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
53+ #ifndef MAP_HASSEMAPHORE
54+ #define MAP_HASSEMAPHORE 0
55+ #endif
56+
57+ #define PG_MMAP_FLAGS (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
4658
4759unsigned long UsedShmemSegID = 0 ;
4860void * UsedShmemSegAddr = NULL ;
61+ static Size AnonymousShmemSize ;
62+ static PGShmemHeader * AnonymousShmem ;
4963
5064static void * InternalIpcMemoryCreate (IpcMemoryKey memKey ,Size size );
5165static void IpcMemoryDetach (int status ,Datum shmaddr );
@@ -218,8 +232,13 @@ InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
218232static void
219233IpcMemoryDetach (int status ,Datum shmaddr )
220234{
235+ /* Detach System V shared memory block. */
221236if (shmdt (DatumGetPointer (shmaddr ))< 0 )
222237elog (LOG ,"shmdt(%p) failed: %m" ,DatumGetPointer (shmaddr ));
238+ /* Release anonymous shared memory block, if any. */
239+ if (AnonymousShmem != NULL
240+ && munmap (AnonymousShmem ,AnonymousShmemSize )< 0 )
241+ elog (LOG ,"munmap(%p) failed: %m" ,AnonymousShmem );
223242}
224243
225244/****************************************************************************/
@@ -357,10 +376,59 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
357376PGShmemHeader * hdr ;
358377IpcMemoryId shmid ;
359378struct stat statbuf ;
379+ Size allocsize = size ;
360380
361381/* Room for a header? */
362382Assert (size > MAXALIGN (sizeof (PGShmemHeader )));
363383
384+ /*
385+ * As of PostgreSQL 9.3, we normally allocate only a very small amount of
386+ * System V shared memory, and only for the purposes of providing an
387+ * interlock to protect the data directory. The real shared memory block
388+ * is allocated using mmap(). This works around the problem that many
389+ * systems have very low limits on the amount of System V shared memory
390+ * that can be allocated. Even a limit of a few megabytes will be enough
391+ * to run many copies of PostgreSQL without needing to adjust system
392+ * settings.
393+ *
394+ * However, we disable this logic in the EXEC_BACKEND case, and fall back
395+ * to the old method of allocating the entire segment using System V shared
396+ * memory, because there's no way to attach an mmap'd segment to a process
397+ * after exec(). Since EXEC_BACKEND is intended only for developer use,
398+ * this shouldn't be a big problem.
399+ */
400+ #ifndef EXEC_BACKEND
401+ {
402+ long pagesize = sysconf (_SC_PAGE_SIZE );
403+
404+ /*
405+ * pagesize will, for practical purposes, always be a power of two.
406+ * But just in case it isn't, we do it this way instead of using
407+ * TYPEALIGN().
408+ */
409+ AnonymousShmemSize = size ;
410+ if (size %pagesize != 0 )
411+ AnonymousShmemSize += pagesize - (size %pagesize );
412+
413+ /*
414+ * We assume that no one will attempt to run PostgreSQL 9.3 or later
415+ * on systems that are ancient enough that anonymous shared memory is
416+ * not supported, such as pre-2.4 versions of Linux. If that turns out
417+ * to be false, we might need to add a run-time test here and do this
418+ * only if the running kernel supports it.
419+ */
420+ AnonymousShmem = mmap (NULL ,size ,PROT_READ |PROT_WRITE ,PG_MMAP_FLAGS ,
421+ -1 ,0 );
422+ if (AnonymousShmem == NULL )
423+ ereport (FATAL ,
424+ (errmsg ("could not map %lu bytes of anonymous shared memory: %m" ,
425+ (unsigned long )AnonymousShmemSize )));
426+
427+ /* Now we can allocate a minimal SHM block. */
428+ allocsize = sizeof (PGShmemHeader );
429+ }
430+ #endif
431+
364432/* Make sure PGSharedMemoryAttach doesn't fail without need */
365433UsedShmemSegAddr = NULL ;
366434
@@ -370,7 +438,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
370438for (NextShmemSegID ++ ;;NextShmemSegID ++ )
371439{
372440/* Try to create new segment */
373- memAddress = InternalIpcMemoryCreate (NextShmemSegID ,size );
441+ memAddress = InternalIpcMemoryCreate (NextShmemSegID ,allocsize );
374442if (memAddress )
375443break ;/* successful create and attach */
376444
@@ -409,7 +477,7 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
409477/*
410478 * Now try again to create the segment.
411479 */
412- memAddress = InternalIpcMemoryCreate (NextShmemSegID ,size );
480+ memAddress = InternalIpcMemoryCreate (NextShmemSegID ,allocsize );
413481if (memAddress )
414482break ;/* successful create and attach */
415483
@@ -448,7 +516,17 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port)
448516UsedShmemSegAddr = memAddress ;
449517UsedShmemSegID = (unsigned long )NextShmemSegID ;
450518
451- return hdr ;
519+ /*
520+ * If AnonymousShmem is NULL here, then we're not using anonymous shared
521+ * memory, and should return a pointer to the System V shared memory block.
522+ * Otherwise, the System V shared memory block is only a shim, and we must
523+ * return a pointer to the real block.
524+ */
525+ if (AnonymousShmem == NULL )
526+ return hdr ;
527+ memcpy (AnonymousShmem ,hdr ,sizeof (PGShmemHeader ));
528+ return AnonymousShmem ;
529+
452530}
453531
454532#ifdef EXEC_BACKEND
@@ -516,6 +594,11 @@ PGSharedMemoryDetach(void)
516594elog (LOG ,"shmdt(%p) failed: %m" ,UsedShmemSegAddr );
517595UsedShmemSegAddr = NULL ;
518596}
597+
598+ /* Release anonymous shared memory block, if any. */
599+ if (AnonymousShmem != NULL
600+ && munmap (AnonymousShmem ,AnonymousShmemSize )< 0 )
601+ elog (LOG ,"munmap(%p) failed: %m" ,AnonymousShmem );
519602}
520603
521604