Commit56f3547

ftang1

authored and

torvalds

committed

mm: adjust vm_committed_as_batch according to vm overcommit policy

When checking a performance change for will-it-scale scalability mmap test[1], we found very high lock contention for spinlock of percpu counter'vm_committed_as': 94.14% 0.35% [kernel.kallsyms] [k] _raw_spin_lock_irqsave 48.21% _raw_spin_lock_irqsave;percpu_counter_add_batch;__vm_enough_memory;mmap_region;do_mmap; 45.91% _raw_spin_lock_irqsave;percpu_counter_add_batch;__do_munmap;Actually this heavy lock contention is not always necessary. The'vm_committed_as' needs to be very precise when the strictOVERCOMMIT_NEVER policy is set, which requires a rather small batch numberfor the percpu counter.So keep 'batch' number unchanged for strict OVERCOMMIT_NEVER policy, andlift it to 64X for OVERCOMMIT_ALWAYS and OVERCOMMIT_GUESS policies. Alsoadd a sysctl handler to adjust it when the policy is reconfigured.Benchmark with the same testcase in [1] shows 53% improvement on a 8C/16Tdesktop, and 2097%(20X) on a 4S/72C/144T server. We tested with testplatforms in 0day (server, desktop and laptop), and 80%+ platforms showsimprovements with that test. And whether it shows improvements depends onif the test mmap size is bigger than the batch number computed.And if the lift is 16X, 1/3 of the platforms will show improvements,though it should help the mmap/unmap usage generally, as Michal Hockomentioned:: I believe that there are non-synthetic worklaods which would benefit from: a larger batch. E.g. large in memory databases which do large mmaps: during startups from multiple threads.[1]https://lore.kernel.org/lkml/20200305062138.GI5972@shao2-debian/Signed-off-by: Feng Tang <feng.tang@intel.com>Signed-off-by: Andrew Morton <akpm@linux-foundation.org>Acked-by: Michal Hocko <mhocko@suse.com>Cc: Matthew Wilcox (Oracle) <willy@infradead.org>Cc: Johannes Weiner <hannes@cmpxchg.org>Cc: Mel Gorman <mgorman@suse.de>Cc: Qian Cai <cai@lca.pw>Cc: Kees Cook <keescook@chromium.org>Cc: Andi Kleen <andi.kleen@intel.com>Cc: Tim Chen <tim.c.chen@intel.com>Cc: Dave Hansen <dave.hansen@intel.com>Cc: Huang Ying <ying.huang@intel.com>Cc: Christoph Lameter <cl@linux.com>Cc: Dennis Zhou <dennis@kernel.org>Cc: Haiyang Zhang <haiyangz@microsoft.com>Cc: kernel test robot <rong.a.chen@intel.com>Cc: "K. Y. Srinivasan" <kys@microsoft.com>Cc: Tejun Heo <tj@kernel.org>Link:http://lkml.kernel.org/r/1589611660-89854-4-git-send-email-feng.tang@intel.comLink:http://lkml.kernel.org/r/1592725000-73486-4-git-send-email-feng.tang@intel.comLink:http://lkml.kernel.org/r/1594389708-60781-5-git-send-email-feng.tang@intel.comSigned-off-by: Linus Torvalds <torvalds@linux-foundation.org>

1 parent0a4954a commit56f3547Copy full SHA for 56f3547

File tree

5 files changed

+64

-7

lines changed

include/linux
- mm.h
- mman.h
kernel
- sysctl.c
mm
- mm_init.c
- util.c

5 files changed

+64

-7

lines changed

`‎include/linux/mm.h‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -206,6 +206,8 @@ int overcommit_ratio_handler(struct ctl_table , int, void , size_t *,`
`206`	`206`	`loff_t*);`
`207`	`207`	`intovercommit_kbytes_handler(structctl_table,int,void,size_t*,`
`208`	`208`	`loff_t*);`
	`209`	`+intovercommit_policy_handler(structctl_table,int,void,size_t*,`
	`210`	`+loff_t*);`
`209`	`211`
`210`	`212`	`#definenth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))`
`211`	`213`

`‎include/linux/mman.h‎`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -57,8 +57,12 @@ extern struct percpu_counter vm_committed_as;`
`57`	`57`
`58`	`58`	`#ifdefCONFIG_SMP`
`59`	`59`	`externs32vm_committed_as_batch;`
	`60`	`+externvoidmm_compute_batch(intovercommit_policy);`
`60`	`61`	`#else`
`61`	`62`	`#definevm_committed_as_batch 0`
	`63`	`+staticinlinevoidmm_compute_batch(intovercommit_policy)`
	`64`	`+{`
	`65`	`+}`
`62`	`66`	`#endif`
`63`	`67`
`64`	`68`	`unsigned longvm_memory_committed(void);`

`‎kernel/sysctl.c‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -2671,7 +2671,7 @@ static struct ctl_table vm_table[] = {`
`2671`	`2671`	`.data=&sysctl_overcommit_memory,`
`2672`	`2672`	`.maxlen=sizeof(sysctl_overcommit_memory),`
`2673`	`2673`	`.mode=0644,`
`2674`		`-.proc_handler=proc_dointvec_minmax,`
	`2674`	`+.proc_handler=overcommit_policy_handler,`
`2675`	`2675`	`.extra1=SYSCTL_ZERO,`
`2676`	`2676`	`.extra2=&two,`
`2677`	`2677`	`},`

`‎mm/mm_init.c‎`

Lines changed: 16 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`#include<linux/memory.h>`
`14`	`14`	`#include<linux/notifier.h>`
`15`	`15`	`#include<linux/sched.h>`
	`16`	`+#include<linux/mman.h>`
`16`	`17`	`#include"internal.h"`
`17`	`18`
`18`	`19`	`#ifdefCONFIG_DEBUG_MEMORY_INIT`
`@@ -144,14 +145,23 @@ EXPORT_SYMBOL_GPL(mm_kobj);`
`144`	`145`	`#ifdefCONFIG_SMP`
`145`	`146`	`s32vm_committed_as_batch=32;`
`146`	`147`
`147`		`-staticvoid__meminitmm_compute_batch(void)`
	`148`	`+voidmm_compute_batch(intovercommit_policy)`
`148`	`149`	`{`
`149`	`150`	`u64memsized_batch;`
`150`	`151`	`s32nr=num_present_cpus();`
`151`	`152`	`s32batch=max_t(s32,nr*2,32);`
`152`		`-`
`153`		`-/* batch size set to 0.4% of (total memory/#cpus), or max int32 */`
`154`		`-memsized_batch=min_t(u64, (totalram_pages()/nr)/256,0x7fffffff);`
	`153`	`+unsigned longram_pages=totalram_pages();`
	`154`	`+`
	`155`	`+/*`
	`156`	`+ * For policy OVERCOMMIT_NEVER, set batch size to 0.4% of`
	`157`	`+ * (total memory/#cpus), and lift it to 25% for other policies`
	`158`	`+ * to easy the possible lock contention for percpu_counter`
	`159`	`+ * vm_committed_as, while the max limit is INT_MAX`
	`160`	`+ */`
	`161`	`+if (overcommit_policy==OVERCOMMIT_NEVER)`
	`162`	`+memsized_batch=min_t(u64,ram_pages/nr/256,INT_MAX);`
	`163`	`+else`
	`164`	`+memsized_batch=min_t(u64,ram_pages/nr/4,INT_MAX);`
`155`	`165`
`156`	`166`	`vm_committed_as_batch=max_t(s32,memsized_batch,batch);`
`157`	`167`	`}`
`@@ -162,7 +172,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,`
`162`	`172`	`switch (action) {`
`163`	`173`	`caseMEM_ONLINE:`
`164`	`174`	`caseMEM_OFFLINE:`
`165`		`-mm_compute_batch();`
	`175`	`+mm_compute_batch(sysctl_overcommit_memory);`
`166`	`176`	`default:`
`167`	`177`	`break;`
`168`	`178`	`}`
`@@ -176,7 +186,7 @@ static struct notifier_block compute_batch_nb __meminitdata = {`
`176`	`186`
`177`	`187`	`staticint__initmm_compute_batch_init(void)`
`178`	`188`	`{`
`179`		`-mm_compute_batch();`
	`189`	`+mm_compute_batch(sysctl_overcommit_memory);`
`180`	`190`	`register_hotmemory_notifier(&compute_batch_nb);`
`181`	`191`
`182`	`192`	`return0;`

`‎mm/util.c‎`

Lines changed: 41 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -746,6 +746,47 @@ int overcommit_ratio_handler(struct ctl_table table, int write, void buffer,`
`746`	`746`	`returnret;`
`747`	`747`	`}`
`748`	`748`
	`749`	`+staticvoidsync_overcommit_as(structwork_struct*dummy)`
	`750`	`+{`
	`751`	`+percpu_counter_sync(&vm_committed_as);`
	`752`	`+}`
	`753`	`+`
	`754`	`+intovercommit_policy_handler(structctl_tabletable,intwrite,voidbuffer,`
	`755`	`+size_tlenp,loff_tppos)`
	`756`	`+{`
	`757`	`+structctl_tablet;`
	`758`	`+intnew_policy;`
	`759`	`+intret;`
	`760`	`+`
	`761`	`+/*`
	`762`	`+ * The deviation of sync_overcommit_as could be big with loose policy`
	`763`	`+ * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to`
	`764`	`+ * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply`
	`765`	`+ * with the strict "NEVER", and to avoid possible race condtion (even`
	`766`	`+ * though user usually won't too frequently do the switching to policy`
	`767`	`+ * OVERCOMMIT_NEVER), the switch is done in the following order:`
	`768`	`+ *1. changing the batch`
	`769`	`+ *2. sync percpu count on each CPU`
	`770`	`+ *3. switch the policy`
	`771`	`+ */`
	`772`	`+if (write) {`
	`773`	`+t=*table;`
	`774`	`+t.data=&new_policy;`
	`775`	`+ret=proc_dointvec_minmax(&t,write,buffer,lenp,ppos);`
	`776`	`+if (ret)`
	`777`	`+returnret;`
	`778`	`+`
	`779`	`+mm_compute_batch(new_policy);`
	`780`	`+if (new_policy==OVERCOMMIT_NEVER)`
	`781`	`+schedule_on_each_cpu(sync_overcommit_as);`
	`782`	`+sysctl_overcommit_memory=new_policy;`
	`783`	`+}else {`
	`784`	`+ret=proc_dointvec_minmax(table,write,buffer,lenp,ppos);`
	`785`	`+}`
	`786`	`+`
	`787`	`+returnret;`
	`788`	`+}`
	`789`	`+`
`749`	`790`	`intovercommit_kbytes_handler(structctl_tabletable,intwrite,voidbuffer,`
`750`	`791`	`size_tlenp,loff_tppos)`
`751`	`792`	`{`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit56f3547

File tree

5 files changed

5 files changed

`‎include/linux/mm.h‎`

`‎include/linux/mman.h‎`

`‎kernel/sysctl.c‎`

`‎mm/mm_init.c‎`

`‎mm/util.c‎`

0 commit comments