Skip to content

Commit f45ec5f

Browse files
xzpetertorvalds
authored andcommitted
userfaultfd: wp: support swap and page migration
For either swap and page migration, we all use the bit 2 of the entry to identify whether this entry is uffd write-protected. It plays a similar role as the existing soft dirty bit in swap entries but only for keeping the uffd-wp tracking for a specific PTE/PMD. Something special here is that when we want to recover the uffd-wp bit from a swap/migration entry to the PTE bit we'll also need to take care of the _PAGE_RW bit and make sure it's cleared, otherwise even with the _PAGE_UFFD_WP bit we can't trap it at all. In change_pte_range() we do nothing for uffd if the PTE is a swap entry. That can lead to data mismatch if the page that we are going to write protect is swapped out when sending the UFFDIO_WRITEPROTECT. This patch also applies/removes the uffd-wp bit even for the swap entries. Signed-off-by: Peter Xu <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Bobby Powers <[email protected]> Cc: Brian Geffon <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Denis Plotnikov <[email protected]> Cc: "Dr . David Alan Gilbert" <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Martin Cracauer <[email protected]> Cc: Marty McFadden <[email protected]> Cc: Maya Gokhale <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Pavel Emelyanov <[email protected]> Cc: Rik van Riel <[email protected]> Cc: Shaohua Li <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2e3d5dc commit f45ec5f

File tree

6 files changed

+42
-11
lines changed

6 files changed

+42
-11
lines changed

include/linux/swapops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
6868

6969
if (pte_swp_soft_dirty(pte))
7070
pte = pte_swp_clear_soft_dirty(pte);
71+
if (pte_swp_uffd_wp(pte))
72+
pte = pte_swp_clear_uffd_wp(pte);
7173
arch_entry = __pte_to_swp_entry(pte);
7274
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
7375
}

mm/huge_memory.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
22972297
write = is_write_migration_entry(entry);
22982298
young = false;
22992299
soft_dirty = pmd_swp_soft_dirty(old_pmd);
2300+
uffd_wp = pmd_swp_uffd_wp(old_pmd);
23002301
} else {
23012302
page = pmd_page(old_pmd);
23022303
if (pmd_dirty(old_pmd))
@@ -2329,6 +2330,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
23292330
entry = swp_entry_to_pte(swp_entry);
23302331
if (soft_dirty)
23312332
entry = pte_swp_mksoft_dirty(entry);
2333+
if (uffd_wp)
2334+
entry = pte_swp_mkuffd_wp(entry);
23322335
} else {
23332336
entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
23342337
entry = maybe_mkwrite(entry, vma);

mm/memory.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
733733
pte = swp_entry_to_pte(entry);
734734
if (pte_swp_soft_dirty(*src_pte))
735735
pte = pte_swp_mksoft_dirty(pte);
736+
if (pte_swp_uffd_wp(*src_pte))
737+
pte = pte_swp_mkuffd_wp(pte);
736738
set_pte_at(src_mm, addr, src_pte, pte);
737739
}
738740
} else if (is_device_private_entry(entry)) {
@@ -762,6 +764,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
762764
is_cow_mapping(vm_flags)) {
763765
make_device_private_entry_read(&entry);
764766
pte = swp_entry_to_pte(entry);
767+
if (pte_swp_uffd_wp(*src_pte))
768+
pte = pte_swp_mkuffd_wp(pte);
765769
set_pte_at(src_mm, addr, src_pte, pte);
766770
}
767771
}
@@ -3098,6 +3102,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
30983102
flush_icache_page(vma, page);
30993103
if (pte_swp_soft_dirty(vmf->orig_pte))
31003104
pte = pte_mksoft_dirty(pte);
3105+
if (pte_swp_uffd_wp(vmf->orig_pte)) {
3106+
pte = pte_mkuffd_wp(pte);
3107+
pte = pte_wrprotect(pte);
3108+
}
31013109
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
31023110
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
31033111
vmf->orig_pte = pte;

mm/migrate.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,11 +243,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
243243
entry = pte_to_swp_entry(*pvmw.pte);
244244
if (is_write_migration_entry(entry))
245245
pte = maybe_mkwrite(pte, vma);
246+
else if (pte_swp_uffd_wp(*pvmw.pte))
247+
pte = pte_mkuffd_wp(pte);
246248

247249
if (unlikely(is_zone_device_page(new))) {
248250
if (is_device_private_page(new)) {
249251
entry = make_device_private_entry(new, pte_write(pte));
250252
pte = swp_entry_to_pte(entry);
253+
if (pte_swp_uffd_wp(*pvmw.pte))
254+
pte = pte_mkuffd_wp(pte);
251255
}
252256
}
253257

@@ -2338,6 +2342,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
23382342
swp_pte = swp_entry_to_pte(entry);
23392343
if (pte_soft_dirty(pte))
23402344
swp_pte = pte_swp_mksoft_dirty(swp_pte);
2345+
if (pte_uffd_wp(pte))
2346+
swp_pte = pte_swp_mkuffd_wp(swp_pte);
23412347
set_pte_at(mm, addr, ptep, swp_pte);
23422348

23432349
/*

mm/mprotect.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
139139
}
140140
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
141141
pages++;
142-
} else if (IS_ENABLED(CONFIG_MIGRATION)) {
142+
} else if (is_swap_pte(oldpte)) {
143143
swp_entry_t entry = pte_to_swp_entry(oldpte);
144+
pte_t newpte;
144145

145146
if (is_write_migration_entry(entry)) {
146-
pte_t newpte;
147147
/*
148148
* A protection check is difficult so
149149
* just be safe and disable write
@@ -152,22 +152,28 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
152152
newpte = swp_entry_to_pte(entry);
153153
if (pte_swp_soft_dirty(oldpte))
154154
newpte = pte_swp_mksoft_dirty(newpte);
155-
set_pte_at(vma->vm_mm, addr, pte, newpte);
156-
157-
pages++;
158-
}
159-
160-
if (is_write_device_private_entry(entry)) {
161-
pte_t newpte;
162-
155+
if (pte_swp_uffd_wp(oldpte))
156+
newpte = pte_swp_mkuffd_wp(newpte);
157+
} else if (is_write_device_private_entry(entry)) {
163158
/*
164159
* We do not preserve soft-dirtiness. See
165160
* copy_one_pte() for explanation.
166161
*/
167162
make_device_private_entry_read(&entry);
168163
newpte = swp_entry_to_pte(entry);
169-
set_pte_at(vma->vm_mm, addr, pte, newpte);
164+
if (pte_swp_uffd_wp(oldpte))
165+
newpte = pte_swp_mkuffd_wp(newpte);
166+
} else {
167+
newpte = oldpte;
168+
}
170169

170+
if (uffd_wp)
171+
newpte = pte_swp_mkuffd_wp(newpte);
172+
else if (uffd_wp_resolve)
173+
newpte = pte_swp_clear_uffd_wp(newpte);
174+
175+
if (!pte_same(oldpte, newpte)) {
176+
set_pte_at(vma->vm_mm, addr, pte, newpte);
171177
pages++;
172178
}
173179
}

mm/rmap.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,6 +1502,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
15021502
swp_pte = swp_entry_to_pte(entry);
15031503
if (pte_soft_dirty(pteval))
15041504
swp_pte = pte_swp_mksoft_dirty(swp_pte);
1505+
if (pte_uffd_wp(pteval))
1506+
swp_pte = pte_swp_mkuffd_wp(swp_pte);
15051507
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
15061508
/*
15071509
* No need to invalidate here it will synchronize on
@@ -1601,6 +1603,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
16011603
swp_pte = swp_entry_to_pte(entry);
16021604
if (pte_soft_dirty(pteval))
16031605
swp_pte = pte_swp_mksoft_dirty(swp_pte);
1606+
if (pte_uffd_wp(pteval))
1607+
swp_pte = pte_swp_mkuffd_wp(swp_pte);
16041608
set_pte_at(mm, address, pvmw.pte, swp_pte);
16051609
/*
16061610
* No need to invalidate here it will synchronize on
@@ -1667,6 +1671,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
16671671
swp_pte = swp_entry_to_pte(entry);
16681672
if (pte_soft_dirty(pteval))
16691673
swp_pte = pte_swp_mksoft_dirty(swp_pte);
1674+
if (pte_uffd_wp(pteval))
1675+
swp_pte = pte_swp_mkuffd_wp(swp_pte);
16701676
set_pte_at(mm, address, pvmw.pte, swp_pte);
16711677
/* Invalidate as we cleared the pte */
16721678
mmu_notifier_invalidate_range(mm, address,

0 commit comments

Comments
 (0)