Skip to content

Commit 9edb0e6

Browse files
Konstantin Khlebnikovgregkh
Konstantin Khlebnikov
authored andcommitted
mm/balloon_compaction: redesign ballooned pages management
commit d6d86c0 upstream. Sasha Levin reported KASAN splash inside isolate_migratepages_range(). Problem is in the function __is_movable_balloon_page() which tests AS_BALLOON_MAP in page->mapping->flags. This function has no protection against anonymous pages. As result it tried to check address space flags inside struct anon_vma. Further investigation shows more problems in current implementation: * Special branch in __unmap_and_move() never works: balloon_page_movable() checks page flags and page_count. In __unmap_and_move() page is locked, reference counter is elevated, thus balloon_page_movable() always fails. As a result execution goes to the normal migration path. virtballoon_migratepage() returns MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS, move_to_new_page() thinks this is an error code and assigns newpage->mapping to NULL. Newly migrated page lose connectivity with balloon an all ability for further migration. * lru_lock erroneously required in isolate_migratepages_range() for isolation ballooned page. This function releases lru_lock periodically, this makes migration mostly impossible for some pages. * balloon_page_dequeue have a tight race with balloon_page_isolate: balloon_page_isolate could be executed in parallel with dequeue between picking page from list and locking page_lock. Race is rare because they use trylock_page() for locking. This patch fixes all of them. Instead of fake mapping with special flag this patch uses special state of page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256. Buddy allocator uses PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose. Storing mark directly in struct page makes everything safer and easier. PagePrivate is used to mark pages present in page list (i.e. not isolated, like PageLRU for normal pages). It replaces special rules for reference counter and makes balloon migration similar to migration of normal pages. This flag is protected by page_lock together with link to the balloon device. Signed-off-by: Konstantin Khlebnikov <[email protected]> Reported-by: Sasha Levin <[email protected]> Link: http://lkml.kernel.org/p/[email protected] Cc: Rafael Aquini <[email protected]> Cc: Andrey Ryabinin <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent db96c43 commit 9edb0e6

File tree

7 files changed

+68
-118
lines changed

7 files changed

+68
-118
lines changed

drivers/virtio/virtio_balloon.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
163163
/* Find pfns pointing at start of each page, get pages and free them. */
164164
for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
165165
struct page *page = balloon_pfn_to_page(pfns[i]);
166-
balloon_page_free(page);
167166
adjust_managed_page_count(page, 1);
167+
put_page(page); /* balloon reference */
168168
}
169169
}
170170

@@ -395,6 +395,8 @@ static int virtballoon_migratepage(struct address_space *mapping,
395395
if (!mutex_trylock(&vb->balloon_lock))
396396
return -EAGAIN;
397397

398+
get_page(newpage); /* balloon reference */
399+
398400
/* balloon's page migration 1st step -- inflate "newpage" */
399401
spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
400402
balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
@@ -404,20 +406,17 @@ static int virtballoon_migratepage(struct address_space *mapping,
404406
set_page_pfns(vb->pfns, newpage);
405407
tell_host(vb, vb->inflate_vq);
406408

407-
/*
408-
* balloon's page migration 2nd step -- deflate "page"
409-
*
410-
* It's safe to delete page->lru here because this page is at
411-
* an isolated migration list, and this step is expected to happen here
412-
*/
409+
/* balloon's page migration 2nd step -- deflate "page" */
413410
balloon_page_delete(page);
414411
vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
415412
set_page_pfns(vb->pfns, page);
416413
tell_host(vb, vb->deflate_vq);
417414

418415
mutex_unlock(&vb->balloon_lock);
419416

420-
return MIGRATEPAGE_BALLOON_SUCCESS;
417+
put_page(page); /* balloon reference */
418+
419+
return MIGRATEPAGE_SUCCESS;
421420
}
422421

423422
/* define the balloon_mapping->a_ops callback to allow balloon page migration */

include/linux/balloon_compaction.h

+24-73
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,13 @@
2727
* counter raised only while it is under our special handling;
2828
*
2929
* iii. after the lockless scan step have selected a potential balloon page for
30-
* isolation, re-test the page->mapping flags and the page ref counter
30+
* isolation, re-test the PageBalloon mark and the PagePrivate flag
3131
* under the proper page lock, to ensure isolating a valid balloon page
3232
* (not yet isolated, nor under release procedure)
3333
*
34+
* iv. isolation or dequeueing procedure must clear PagePrivate flag under
35+
* page lock together with removing page from balloon device page list.
36+
*
3437
* The functions provided by this interface are placed to help on coping with
3538
* the aforementioned balloon page corner case, as well as to ensure the simple
3639
* set of exposed rules are satisfied while we are dealing with balloon pages
@@ -71,28 +74,6 @@ static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
7174
kfree(b_dev_info);
7275
}
7376

74-
/*
75-
* balloon_page_free - release a balloon page back to the page free lists
76-
* @page: ballooned page to be set free
77-
*
78-
* This function must be used to properly set free an isolated/dequeued balloon
79-
* page at the end of a sucessful page migration, or at the balloon driver's
80-
* page release procedure.
81-
*/
82-
static inline void balloon_page_free(struct page *page)
83-
{
84-
/*
85-
* Balloon pages always get an extra refcount before being isolated
86-
* and before being dequeued to help on sorting out fortuite colisions
87-
* between a thread attempting to isolate and another thread attempting
88-
* to release the very same balloon page.
89-
*
90-
* Before we handle the page back to Buddy, lets drop its extra refcnt.
91-
*/
92-
put_page(page);
93-
__free_page(page);
94-
}
95-
9677
#ifdef CONFIG_BALLOON_COMPACTION
9778
extern bool balloon_page_isolate(struct page *page);
9879
extern void balloon_page_putback(struct page *page);
@@ -108,74 +89,33 @@ static inline void balloon_mapping_free(struct address_space *balloon_mapping)
10889
}
10990

11091
/*
111-
* page_flags_cleared - helper to perform balloon @page ->flags tests.
112-
*
113-
* As balloon pages are obtained from buddy and we do not play with page->flags
114-
* at driver level (exception made when we get the page lock for compaction),
115-
* we can safely identify a ballooned page by checking if the
116-
* PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared. This approach also
117-
* helps us skip ballooned pages that are locked for compaction or release, thus
118-
* mitigating their racy check at balloon_page_movable()
119-
*/
120-
static inline bool page_flags_cleared(struct page *page)
121-
{
122-
return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
123-
}
124-
125-
/*
126-
* __is_movable_balloon_page - helper to perform @page mapping->flags tests
92+
* __is_movable_balloon_page - helper to perform @page PageBalloon tests
12793
*/
12894
static inline bool __is_movable_balloon_page(struct page *page)
12995
{
130-
struct address_space *mapping = page->mapping;
131-
return mapping_balloon(mapping);
96+
return PageBalloon(page);
13297
}
13398

13499
/*
135-
* balloon_page_movable - test page->mapping->flags to identify balloon pages
136-
* that can be moved by compaction/migration.
137-
*
138-
* This function is used at core compaction's page isolation scheme, therefore
139-
* most pages exposed to it are not enlisted as balloon pages and so, to avoid
140-
* undesired side effects like racing against __free_pages(), we cannot afford
141-
* holding the page locked while testing page->mapping->flags here.
100+
* balloon_page_movable - test PageBalloon to identify balloon pages
101+
* and PagePrivate to check that the page is not
102+
* isolated and can be moved by compaction/migration.
142103
*
143104
* As we might return false positives in the case of a balloon page being just
144-
* released under us, the page->mapping->flags need to be re-tested later,
145-
* under the proper page lock, at the functions that will be coping with the
146-
* balloon page case.
105+
* released under us, this need to be re-tested later, under the page lock.
147106
*/
148107
static inline bool balloon_page_movable(struct page *page)
149108
{
150-
/*
151-
* Before dereferencing and testing mapping->flags, let's make sure
152-
* this is not a page that uses ->mapping in a different way
153-
*/
154-
if (page_flags_cleared(page) && !page_mapped(page) &&
155-
page_count(page) == 1)
156-
return __is_movable_balloon_page(page);
157-
158-
return false;
109+
return PageBalloon(page) && PagePrivate(page);
159110
}
160111

161112
/*
162113
* isolated_balloon_page - identify an isolated balloon page on private
163114
* compaction/migration page lists.
164-
*
165-
* After a compaction thread isolates a balloon page for migration, it raises
166-
* the page refcount to prevent concurrent compaction threads from re-isolating
167-
* the same page. For that reason putback_movable_pages(), or other routines
168-
* that need to identify isolated balloon pages on private pagelists, cannot
169-
* rely on balloon_page_movable() to accomplish the task.
170115
*/
171116
static inline bool isolated_balloon_page(struct page *page)
172117
{
173-
/* Already isolated balloon pages, by default, have a raised refcount */
174-
if (page_flags_cleared(page) && !page_mapped(page) &&
175-
page_count(page) >= 2)
176-
return __is_movable_balloon_page(page);
177-
178-
return false;
118+
return PageBalloon(page);
179119
}
180120

181121
/*
@@ -192,6 +132,8 @@ static inline void balloon_page_insert(struct page *page,
192132
struct address_space *mapping,
193133
struct list_head *head)
194134
{
135+
__SetPageBalloon(page);
136+
SetPagePrivate(page);
195137
page->mapping = mapping;
196138
list_add(&page->lru, head);
197139
}
@@ -206,8 +148,12 @@ static inline void balloon_page_insert(struct page *page,
206148
*/
207149
static inline void balloon_page_delete(struct page *page)
208150
{
151+
__ClearPageBalloon(page);
209152
page->mapping = NULL;
210-
list_del(&page->lru);
153+
if (PagePrivate(page)) {
154+
ClearPagePrivate(page);
155+
list_del(&page->lru);
156+
}
211157
}
212158

213159
/*
@@ -258,6 +204,11 @@ static inline void balloon_page_delete(struct page *page)
258204
list_del(&page->lru);
259205
}
260206

207+
static inline bool __is_movable_balloon_page(struct page *page)
208+
{
209+
return false;
210+
}
211+
261212
static inline bool balloon_page_movable(struct page *page)
262213
{
263214
return false;

include/linux/migrate.h

+1-10
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,9 @@ typedef void free_page_t(struct page *page, unsigned long private);
1313
* Return values from addresss_space_operations.migratepage():
1414
* - negative errno on page migration failure;
1515
* - zero on page migration success;
16-
*
17-
* The balloon page migration introduces this special case where a 'distinct'
18-
* return code is used to flag a successful page migration to unmap_and_move().
19-
* This approach is necessary because page migration can race against balloon
20-
* deflation procedure, and for such case we could introduce a nasty page leak
21-
* if a successfully migrated balloon page gets released concurrently with
22-
* migration's unmap_and_move() wrap-up steps.
2316
*/
2417
#define MIGRATEPAGE_SUCCESS 0
25-
#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
26-
* sucessful migration case.
27-
*/
18+
2819
enum migrate_reason {
2920
MR_COMPACTION,
3021
MR_MEMORY_FAILURE,

include/linux/mm.h

+19
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,25 @@ static inline void __ClearPageBuddy(struct page *page)
553553
atomic_set(&page->_mapcount, -1);
554554
}
555555

556+
#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
557+
558+
static inline int PageBalloon(struct page *page)
559+
{
560+
return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
561+
}
562+
563+
static inline void __SetPageBalloon(struct page *page)
564+
{
565+
VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
566+
atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
567+
}
568+
569+
static inline void __ClearPageBalloon(struct page *page)
570+
{
571+
VM_BUG_ON_PAGE(!PageBalloon(page), page);
572+
atomic_set(&page->_mapcount, -1);
573+
}
574+
556575
void put_page(struct page *page);
557576
void put_pages_list(struct list_head *pages);
558577

mm/balloon_compaction.c

+12-14
Original file line numberDiff line numberDiff line change
@@ -93,17 +93,12 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
9393
* to be released by the balloon driver.
9494
*/
9595
if (trylock_page(page)) {
96+
if (!PagePrivate(page)) {
97+
/* raced with isolation */
98+
unlock_page(page);
99+
continue;
100+
}
96101
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
97-
/*
98-
* Raise the page refcount here to prevent any wrong
99-
* attempt to isolate this page, in case of coliding
100-
* with balloon_page_isolate() just after we release
101-
* the page lock.
102-
*
103-
* balloon_page_free() will take care of dropping
104-
* this extra refcount later.
105-
*/
106-
get_page(page);
107102
balloon_page_delete(page);
108103
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
109104
unlock_page(page);
@@ -187,7 +182,9 @@ static inline void __isolate_balloon_page(struct page *page)
187182
{
188183
struct balloon_dev_info *b_dev_info = page->mapping->private_data;
189184
unsigned long flags;
185+
190186
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
187+
ClearPagePrivate(page);
191188
list_del(&page->lru);
192189
b_dev_info->isolated_pages++;
193190
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -197,7 +194,9 @@ static inline void __putback_balloon_page(struct page *page)
197194
{
198195
struct balloon_dev_info *b_dev_info = page->mapping->private_data;
199196
unsigned long flags;
197+
200198
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
199+
SetPagePrivate(page);
201200
list_add(&page->lru, &b_dev_info->pages);
202201
b_dev_info->isolated_pages--;
203202
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -235,12 +234,11 @@ bool balloon_page_isolate(struct page *page)
235234
*/
236235
if (likely(trylock_page(page))) {
237236
/*
238-
* A ballooned page, by default, has just one refcount.
237+
* A ballooned page, by default, has PagePrivate set.
239238
* Prevent concurrent compaction threads from isolating
240-
* an already isolated balloon page by refcount check.
239+
* an already isolated balloon page by clearing it.
241240
*/
242-
if (__is_movable_balloon_page(page) &&
243-
page_count(page) == 2) {
241+
if (balloon_page_movable(page)) {
244242
__isolate_balloon_page(page);
245243
unlock_page(page);
246244
return true;

mm/compaction.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
597597
*/
598598
if (!PageLRU(page)) {
599599
if (unlikely(balloon_page_movable(page))) {
600-
if (locked && balloon_page_isolate(page)) {
600+
if (balloon_page_isolate(page)) {
601601
/* Successfully isolated */
602602
goto isolate_success;
603603
}

mm/migrate.c

+4-12
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
876876
}
877877
}
878878

879-
if (unlikely(balloon_page_movable(page))) {
879+
if (unlikely(isolated_balloon_page(page))) {
880880
/*
881881
* A ballooned page does not need any special attention from
882882
* physical to virtual reverse mapping procedures.
@@ -955,17 +955,6 @@ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
955955

956956
rc = __unmap_and_move(page, newpage, force, mode);
957957

958-
if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
959-
/*
960-
* A ballooned page has been migrated already.
961-
* Now, it's the time to wrap-up counters,
962-
* handle the page back to Buddy and return.
963-
*/
964-
dec_zone_page_state(page, NR_ISOLATED_ANON +
965-
page_is_file_cache(page));
966-
balloon_page_free(page);
967-
return MIGRATEPAGE_SUCCESS;
968-
}
969958
out:
970959
if (rc != -EAGAIN) {
971960
/*
@@ -988,6 +977,9 @@ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
988977
if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
989978
ClearPageSwapBacked(newpage);
990979
put_new_page(newpage, private);
980+
} else if (unlikely(__is_movable_balloon_page(newpage))) {
981+
/* drop our reference, page already in the balloon */
982+
put_page(newpage);
991983
} else
992984
putback_lru_page(newpage);
993985

0 commit comments

Comments
 (0)