Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings
forked fromtorvalds/linux

Commit9a98225

Browse files
kiryltorvalds
authored andcommitted
thp: introduce deferred_split_huge_page()
Currently we don't split huge page on partial unmap. It's not an idealsituation. It can lead to memory overhead.Furtunately, we can detect partial unmap on page_remove_rmap(). But wecannot call split_huge_page() from there due to locking context.It's also counterproductive to do directly from munmap() codepath: inmany cases we will hit this from exit(2) and splitting the huge pagejust to free it up in small pages is not what we really want.The patch introduce deferred_split_huge_page() which put the huge pageinto queue for splitting. The splitting itself will happen when we getmemory pressure via shrinker interface. The page will be dropped fromlist on freeing through compound page destructor.Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>Tested-by: Sasha Levin <sasha.levin@oracle.com>Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>Acked-by: Vlastimil Babka <vbabka@suse.cz>Acked-by: Jerome Marchand <jmarchan@redhat.com>Cc: Andrea Arcangeli <aarcange@redhat.com>Cc: Hugh Dickins <hughd@google.com>Cc: Dave Hansen <dave.hansen@intel.com>Cc: Mel Gorman <mgorman@suse.de>Cc: Rik van Riel <riel@redhat.com>Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>Cc: Steve Capper <steve.capper@linaro.org>Cc: Johannes Weiner <hannes@cmpxchg.org>Cc: Michal Hocko <mhocko@suse.cz>Cc: Christoph Lameter <cl@linux.com>Cc: David Rientjes <rientjes@google.com>Signed-off-by: Andrew Morton <akpm@linux-foundation.org>Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent248db92 commit9a98225

File tree

7 files changed

+174
-12
lines changed

7 files changed

+174
-12
lines changed

‎include/linux/huge_mm.h‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,15 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
9090

9191
externunsigned longtransparent_hugepage_flags;
9292

93+
externvoidprep_transhuge_page(structpage*page);
94+
externvoidfree_transhuge_page(structpage*page);
95+
9396
intsplit_huge_page_to_list(structpage*page,structlist_head*list);
9497
staticinlineintsplit_huge_page(structpage*page)
9598
{
9699
returnsplit_huge_page_to_list(page,NULL);
97100
}
101+
voiddeferred_split_huge_page(structpage*page);
98102

99103
void__split_huge_pmd(structvm_area_struct*vma,pmd_t*pmd,
100104
unsigned longaddress);
@@ -170,6 +174,7 @@ static inline int split_huge_page(struct page *page)
170174
{
171175
return0;
172176
}
177+
staticinlinevoiddeferred_split_huge_page(structpage*page) {}
173178
#definesplit_huge_pmd(__vma,__pmd,__address)\
174179
do { } while (0)
175180
staticinlineinthugepage_madvise(structvm_area_struct*vma,

‎include/linux/mm.h‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,9 @@ enum compound_dtor_id {
507507
COMPOUND_PAGE_DTOR,
508508
#ifdefCONFIG_HUGETLB_PAGE
509509
HUGETLB_PAGE_DTOR,
510+
#endif
511+
#ifdefCONFIG_TRANSPARENT_HUGEPAGE
512+
TRANSHUGE_PAGE_DTOR,
510513
#endif
511514
NR_COMPOUND_DTORS,
512515
};
@@ -537,6 +540,8 @@ static inline void set_compound_order(struct page *page, unsigned int order)
537540
page[1].compound_order=order;
538541
}
539542

543+
voidfree_compound_page(structpage*page);
544+
540545
#ifdefCONFIG_MMU
541546
/*
542547
* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when

‎include/linux/mm_types.h‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,15 @@ struct page {
5555
*/
5656
void*s_mem;/* slab first object */
5757
atomic_tcompound_mapcount;/* first tail page */
58+
/* page_deferred_list().next -- second tail page */
5859
};
5960

6061
/* Second double word */
6162
struct {
6263
union {
6364
pgoff_tindex;/* Our offset within mapping. */
6465
void*freelist;/* sl[aou]b first free object */
66+
/* page_deferred_list().prev-- second tail page */
6567
};
6668

6769
union {

‎mm/huge_memory.c‎

Lines changed: 135 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ static struct khugepaged_scan khugepaged_scan = {
135135
.mm_head=LIST_HEAD_INIT(khugepaged_scan.mm_head),
136136
};
137137

138+
staticDEFINE_SPINLOCK(split_queue_lock);
139+
staticLIST_HEAD(split_queue);
140+
staticunsigned longsplit_queue_len;
141+
staticstructshrinkerdeferred_split_shrinker;
138142

139143
staticvoidset_recommended_min_free_kbytes(void)
140144
{
@@ -667,6 +671,9 @@ static int __init hugepage_init(void)
667671
err=register_shrinker(&huge_zero_page_shrinker);
668672
if (err)
669673
gotoerr_hzp_shrinker;
674+
err=register_shrinker(&deferred_split_shrinker);
675+
if (err)
676+
gotoerr_split_shrinker;
670677

671678
/*
672679
* By default disable transparent hugepages on smaller systems,
@@ -684,6 +691,8 @@ static int __init hugepage_init(void)
684691

685692
return0;
686693
err_khugepaged:
694+
unregister_shrinker(&deferred_split_shrinker);
695+
err_split_shrinker:
687696
unregister_shrinker(&huge_zero_page_shrinker);
688697
err_hzp_shrinker:
689698
khugepaged_slab_exit();
@@ -740,6 +749,27 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
740749
returnentry;
741750
}
742751

752+
staticinlinestructlist_head*page_deferred_list(structpage*page)
753+
{
754+
/*
755+
* ->lru in the tail pages is occupied by compound_head.
756+
* Let's use ->mapping + ->index in the second tail page as list_head.
757+
*/
758+
return (structlist_head*)&page[2].mapping;
759+
}
760+
761+
voidprep_transhuge_page(structpage*page)
762+
{
763+
/*
764+
* we use page->mapping and page->indexlru in second tail page
765+
* as list_head: assuming THP order >= 2
766+
*/
767+
BUILD_BUG_ON(HPAGE_PMD_ORDER<2);
768+
769+
INIT_LIST_HEAD(page_deferred_list(page));
770+
set_compound_page_dtor(page,TRANSHUGE_PAGE_DTOR);
771+
}
772+
743773
staticint__do_huge_pmd_anonymous_page(structmm_struct*mm,
744774
structvm_area_struct*vma,
745775
unsigned longaddress,pmd_t*pmd,
@@ -896,6 +926,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
896926
count_vm_event(THP_FAULT_FALLBACK);
897927
returnVM_FAULT_FALLBACK;
898928
}
929+
prep_transhuge_page(page);
899930
return__do_huge_pmd_anonymous_page(mm,vma,address,pmd,page,gfp,
900931
flags);
901932
}
@@ -1192,7 +1223,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
11921223
}else
11931224
new_page=NULL;
11941225

1195-
if (unlikely(!new_page)) {
1226+
if (likely(new_page)) {
1227+
prep_transhuge_page(new_page);
1228+
}else {
11961229
if (!page) {
11971230
split_huge_pmd(vma,pmd,address);
11981231
ret |=VM_FAULT_FALLBACK;
@@ -2109,6 +2142,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
21092142
returnNULL;
21102143
}
21112144

2145+
prep_transhuge_page(*hpage);
21122146
count_vm_event(THP_COLLAPSE_ALLOC);
21132147
return*hpage;
21142148
}
@@ -2120,8 +2154,12 @@ static int khugepaged_find_target_node(void)
21202154

21212155
staticinlinestructpage*alloc_hugepage(intdefrag)
21222156
{
2123-
returnalloc_pages(alloc_hugepage_gfpmask(defrag,0),
2124-
HPAGE_PMD_ORDER);
2157+
structpage*page;
2158+
2159+
page=alloc_pages(alloc_hugepage_gfpmask(defrag,0),HPAGE_PMD_ORDER);
2160+
if (page)
2161+
prep_transhuge_page(page);
2162+
returnpage;
21252163
}
21262164

21272165
staticstructpage*khugepaged_alloc_hugepage(bool*wait)
@@ -3098,7 +3136,7 @@ static int __split_huge_page_tail(struct page *head, int tail,
30983136
set_page_idle(page_tail);
30993137

31003138
/* ->mapping in first tail page is compound_mapcount */
3101-
VM_BUG_ON_PAGE(tail!=1&&page_tail->mapping!=TAIL_MAPPING,
3139+
VM_BUG_ON_PAGE(tail>2&&page_tail->mapping!=TAIL_MAPPING,
31023140
page_tail);
31033141
page_tail->mapping=head->mapping;
31043142

@@ -3207,19 +3245,28 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
32073245
freeze_page(anon_vma,head);
32083246
VM_BUG_ON_PAGE(compound_mapcount(head),head);
32093247

3248+
/* Prevent deferred_split_scan() touching ->_count */
3249+
spin_lock(&split_queue_lock);
32103250
count=page_count(head);
32113251
mapcount=total_mapcount(head);
32123252
if (mapcount==count-1) {
3253+
if (!list_empty(page_deferred_list(head))) {
3254+
split_queue_len--;
3255+
list_del(page_deferred_list(head));
3256+
}
3257+
spin_unlock(&split_queue_lock);
32133258
__split_huge_page(page,list);
32143259
ret=0;
32153260
}elseif (IS_ENABLED(CONFIG_DEBUG_VM)&&mapcount>count-1) {
3261+
spin_unlock(&split_queue_lock);
32163262
pr_alert("total_mapcount: %u, page_count(): %u\n",
32173263
mapcount,count);
32183264
if (PageTail(page))
32193265
dump_page(head,NULL);
32203266
dump_page(page,"total_mapcount(head) > page_count(head) - 1");
32213267
BUG();
32223268
}else {
3269+
spin_unlock(&split_queue_lock);
32233270
unfreeze_page(anon_vma,head);
32243271
ret=-EBUSY;
32253272
}
@@ -3231,3 +3278,87 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
32313278
count_vm_event(!ret ?THP_SPLIT_PAGE :THP_SPLIT_PAGE_FAILED);
32323279
returnret;
32333280
}
3281+
3282+
voidfree_transhuge_page(structpage*page)
3283+
{
3284+
unsigned longflags;
3285+
3286+
spin_lock_irqsave(&split_queue_lock,flags);
3287+
if (!list_empty(page_deferred_list(page))) {
3288+
split_queue_len--;
3289+
list_del(page_deferred_list(page));
3290+
}
3291+
spin_unlock_irqrestore(&split_queue_lock,flags);
3292+
free_compound_page(page);
3293+
}
3294+
3295+
voiddeferred_split_huge_page(structpage*page)
3296+
{
3297+
unsigned longflags;
3298+
3299+
VM_BUG_ON_PAGE(!PageTransHuge(page),page);
3300+
3301+
spin_lock_irqsave(&split_queue_lock,flags);
3302+
if (list_empty(page_deferred_list(page))) {
3303+
list_add_tail(page_deferred_list(page),&split_queue);
3304+
split_queue_len++;
3305+
}
3306+
spin_unlock_irqrestore(&split_queue_lock,flags);
3307+
}
3308+
3309+
staticunsigned longdeferred_split_count(structshrinker*shrink,
3310+
structshrink_control*sc)
3311+
{
3312+
/*
3313+
* Split a page from split_queue will free up at least one page,
3314+
* at most HPAGE_PMD_NR - 1. We don't track exact number.
3315+
* Let's use HPAGE_PMD_NR / 2 as ballpark.
3316+
*/
3317+
returnACCESS_ONCE(split_queue_len)*HPAGE_PMD_NR /2;
3318+
}
3319+
3320+
staticunsigned longdeferred_split_scan(structshrinker*shrink,
3321+
structshrink_control*sc)
3322+
{
3323+
unsigned longflags;
3324+
LIST_HEAD(list),*pos,*next;
3325+
structpage*page;
3326+
intsplit=0;
3327+
3328+
spin_lock_irqsave(&split_queue_lock,flags);
3329+
list_splice_init(&split_queue,&list);
3330+
3331+
/* Take pin on all head pages to avoid freeing them under us */
3332+
list_for_each_safe(pos,next,&list) {
3333+
page=list_entry((void*)pos,structpage,mapping);
3334+
page=compound_head(page);
3335+
/* race with put_compound_page() */
3336+
if (!get_page_unless_zero(page)) {
3337+
list_del_init(page_deferred_list(page));
3338+
split_queue_len--;
3339+
}
3340+
}
3341+
spin_unlock_irqrestore(&split_queue_lock,flags);
3342+
3343+
list_for_each_safe(pos,next,&list) {
3344+
page=list_entry((void*)pos,structpage,mapping);
3345+
lock_page(page);
3346+
/* split_huge_page() removes page from list on success */
3347+
if (!split_huge_page(page))
3348+
split++;
3349+
unlock_page(page);
3350+
put_page(page);
3351+
}
3352+
3353+
spin_lock_irqsave(&split_queue_lock,flags);
3354+
list_splice_tail(&list,&split_queue);
3355+
spin_unlock_irqrestore(&split_queue_lock,flags);
3356+
3357+
returnsplit*HPAGE_PMD_NR /2;
3358+
}
3359+
3360+
staticstructshrinkerdeferred_split_shrinker= {
3361+
.count_objects=deferred_split_count,
3362+
.scan_objects=deferred_split_scan,
3363+
.seeks=DEFAULT_SEEKS,
3364+
};

‎mm/migrate.c‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
17601760
HPAGE_PMD_ORDER);
17611761
if (!new_page)
17621762
gotoout_fail;
1763+
prep_transhuge_page(new_page);
17631764

17641765
isolated=numamigrate_isolate_page(pgdat,page);
17651766
if (!isolated) {

‎mm/page_alloc.c‎

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,15 @@ static char * const zone_names[MAX_NR_ZONES] = {
222222
#endif
223223
};
224224

225-
staticvoidfree_compound_page(structpage*page);
226225
compound_page_dtor*constcompound_page_dtors[]= {
227226
NULL,
228227
free_compound_page,
229228
#ifdefCONFIG_HUGETLB_PAGE
230229
free_huge_page,
231230
#endif
231+
#ifdefCONFIG_TRANSPARENT_HUGEPAGE
232+
free_transhuge_page,
233+
#endif
232234
};
233235

234236
intmin_free_kbytes=1024;
@@ -450,7 +452,7 @@ static void bad_page(struct page *page, const char *reason,
450452
* This usage means that zero-order pages may not be compound.
451453
*/
452454

453-
staticvoidfree_compound_page(structpage*page)
455+
voidfree_compound_page(structpage*page)
454456
{
455457
__free_pages_ok(page,compound_order(page));
456458
}
@@ -858,15 +860,26 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
858860
ret=0;
859861
gotoout;
860862
}
861-
/* mapping in first tail page is used for compound_mapcount() */
862-
if (page-head_page==1) {
863+
switch (page-head_page) {
864+
case1:
865+
/* the first tail page: ->mapping is compound_mapcount() */
863866
if (unlikely(compound_mapcount(page))) {
864867
bad_page(page,"nonzero compound_mapcount",0);
865868
gotoout;
866869
}
867-
}elseif (page->mapping!=TAIL_MAPPING) {
868-
bad_page(page,"corrupted mapping in tail page",0);
869-
gotoout;
870+
break;
871+
case2:
872+
/*
873+
* the second tail page: ->mapping is
874+
* page_deferred_list().next -- ignore value.
875+
*/
876+
break;
877+
default:
878+
if (page->mapping!=TAIL_MAPPING) {
879+
bad_page(page,"corrupted mapping in tail page",0);
880+
gotoout;
881+
}
882+
break;
870883
}
871884
if (unlikely(!PageTail(page))) {
872885
bad_page(page,"PageTail not set",0);

‎mm/rmap.c‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1282,8 +1282,10 @@ static void page_remove_anon_compound_rmap(struct page *page)
12821282
nr=HPAGE_PMD_NR;
12831283
}
12841284

1285-
if (nr)
1285+
if (nr) {
12861286
__mod_zone_page_state(page_zone(page),NR_ANON_PAGES,-nr);
1287+
deferred_split_huge_page(page);
1288+
}
12871289
}
12881290

12891291
/**
@@ -1318,6 +1320,9 @@ void page_remove_rmap(struct page *page, bool compound)
13181320
if (unlikely(PageMlocked(page)))
13191321
clear_page_mlock(page);
13201322

1323+
if (PageTransCompound(page))
1324+
deferred_split_huge_page(compound_head(page));
1325+
13211326
/*
13221327
* It would be tidy to reset the PageAnon mapping here,
13231328
* but that might overwrite a racing page_add_anon_rmap

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp