Skip to content

Commit c64ba7f

Browse files
author
Fox Snowpatch
committed
1 parent 7b23713 commit c64ba7f

File tree

9 files changed

+236
-38
lines changed

9 files changed

+236
-38
lines changed

arch/powerpc/include/asm/book3s/64/pgtable.h

+3
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,7 @@ extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
11241124
extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
11251125
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
11261126
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
1127+
extern pud_t pud_modify(pud_t pud, pgprot_t newprot);
11271128
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
11281129
pmd_t *pmdp, pmd_t pmd);
11291130
extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
@@ -1384,6 +1385,8 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
13841385
#define __HAVE_ARCH_PMDP_INVALIDATE
13851386
extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
13861387
pmd_t *pmdp);
1388+
extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
1389+
pud_t *pudp);
13871390

13881391
#define pmd_move_must_withdraw pmd_move_must_withdraw
13891392
struct spinlock;

arch/powerpc/mm/book3s64/pgtable.c

+20
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,17 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
176176
return __pmd(old_pmd);
177177
}
178178

179+
pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
180+
pud_t *pudp)
181+
{
182+
unsigned long old_pud;
183+
184+
VM_WARN_ON_ONCE(!pud_present(*pudp));
185+
old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID);
186+
flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
187+
return __pud(old_pud);
188+
}
189+
179190
pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
180191
unsigned long addr, pmd_t *pmdp, int full)
181192
{
@@ -259,6 +270,15 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
259270
pmdv &= _HPAGE_CHG_MASK;
260271
return pmd_set_protbits(__pmd(pmdv), newprot);
261272
}
273+
274+
pud_t pud_modify(pud_t pud, pgprot_t newprot)
275+
{
276+
unsigned long pudv;
277+
278+
pudv = pud_val(pud);
279+
pudv &= _HPAGE_CHG_MASK;
280+
return pud_set_protbits(__pud(pudv), newprot);
281+
}
262282
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
263283

264284
/* For use by kexec, called with MMU off */

arch/x86/include/asm/pgtable.h

+60-10
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,13 @@ static inline int pud_young(pud_t pud)
174174
return pud_flags(pud) & _PAGE_ACCESSED;
175175
}
176176

177+
static inline bool pud_shstk(pud_t pud)
178+
{
179+
return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
180+
(pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
181+
(_PAGE_DIRTY | _PAGE_PSE);
182+
}
183+
177184
static inline int pte_write(pte_t pte)
178185
{
179186
/*
@@ -780,6 +787,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
780787
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
781788
}
782789

790+
static inline pud_t pud_mkinvalid(pud_t pud)
791+
{
792+
return pfn_pud(pud_pfn(pud),
793+
__pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
794+
}
795+
783796
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
784797

785798
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -827,14 +840,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
827840
pmd_result = __pmd(val);
828841

829842
/*
830-
* To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
831-
* 1. Marking Write=0 PMDs Dirty=1
832-
* 2. Marking Dirty=1 PMDs Write=0
833-
*
834-
* The first case cannot happen because the _PAGE_CHG_MASK will filter
835-
* out any Dirty bit passed in newprot. Handle the second case by
836-
* going through the mksaveddirty exercise. Only do this if the old
837-
* value was Write=1 to avoid doing this on Shadow Stack PTEs.
843+
* Avoid creating shadow stack PMD by accident. See comment in
844+
* pte_modify().
838845
*/
839846
if (oldval & _PAGE_RW)
840847
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -844,6 +851,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
844851
return pmd_result;
845852
}
846853

854+
static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
855+
{
856+
pudval_t val = pud_val(pud), oldval = val;
857+
pud_t pud_result;
858+
859+
val &= _HPAGE_CHG_MASK;
860+
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
861+
val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
862+
863+
pud_result = __pud(val);
864+
865+
/*
866+
* Avoid creating shadow stack PUD by accident. See comment in
867+
* pte_modify().
868+
*/
869+
if (oldval & _PAGE_RW)
870+
pud_result = pud_mksaveddirty(pud_result);
871+
else
872+
pud_result = pud_clear_saveddirty(pud_result);
873+
874+
return pud_result;
875+
}
876+
847877
/*
848878
* mprotect needs to preserve PAT and encryption bits when updating
849879
* vm_page_prot
@@ -1078,8 +1108,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
10781108
#define pud_leaf pud_leaf
10791109
static inline bool pud_leaf(pud_t pud)
10801110
{
1081-
return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
1082-
(_PAGE_PSE | _PAGE_PRESENT);
1111+
return pud_val(pud) & _PAGE_PSE;
10831112
}
10841113

10851114
static inline int pud_bad(pud_t pud)
@@ -1383,10 +1412,28 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
13831412
}
13841413
#endif
13851414

1415+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1416+
static inline pud_t pudp_establish(struct vm_area_struct *vma,
1417+
unsigned long address, pud_t *pudp, pud_t pud)
1418+
{
1419+
page_table_check_pud_set(vma->vm_mm, pudp, pud);
1420+
if (IS_ENABLED(CONFIG_SMP)) {
1421+
return xchg(pudp, pud);
1422+
} else {
1423+
pud_t old = *pudp;
1424+
WRITE_ONCE(*pudp, pud);
1425+
return old;
1426+
}
1427+
}
1428+
#endif
1429+
13861430
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
13871431
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
13881432
unsigned long address, pmd_t *pmdp);
13891433

1434+
pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
1435+
pud_t *pudp);
1436+
13901437
/*
13911438
* Page table pages are page-aligned. The lower half of the top
13921439
* level is used for userspace and the top half for the kernel.
@@ -1668,6 +1715,9 @@ void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
16681715
#define arch_check_zapped_pmd arch_check_zapped_pmd
16691716
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
16701717

1718+
#define arch_check_zapped_pud arch_check_zapped_pud
1719+
void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
1720+
16711721
#ifdef CONFIG_XEN_PV
16721722
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
16731723
static inline bool arch_has_hw_nonleaf_pmd_young(void)

arch/x86/mm/pgtable.c

+18
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,18 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
641641
}
642642
#endif
643643

644+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
645+
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
646+
pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
647+
pud_t *pudp)
648+
{
649+
VM_WARN_ON_ONCE(!pud_present(*pudp));
650+
pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
651+
flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
652+
return old;
653+
}
654+
#endif
655+
644656
/**
645657
* reserve_top_address - reserves a hole in the top of kernel address space
646658
* @reserve - size of hole to reserve
@@ -926,3 +938,9 @@ void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd)
926938
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) &&
927939
pmd_shstk(pmd));
928940
}
941+
942+
void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
943+
{
944+
/* See note in arch_check_zapped_pte() */
945+
VM_WARN_ON_ONCE(!(vma->vm_flags & VM_SHADOW_STACK) && pud_shstk(pud));
946+
}

drivers/dax/device.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,9 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
235235
int id;
236236
struct dev_dax *dev_dax = filp->private_data;
237237

238-
dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) order:%d\n", current->comm,
239-
(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
240-
vmf->vma->vm_start, vmf->vma->vm_end, order);
238+
dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm,
239+
(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
240+
vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order);
241241

242242
id = dax_read_lock();
243243
if (order == 0)

include/linux/huge_mm.h

+24
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,17 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
342342
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
343343
unsigned long address);
344344

345+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
346+
int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
347+
pud_t *pudp, unsigned long addr, pgprot_t newprot,
348+
unsigned long cp_flags);
349+
#else
350+
static inline int
351+
change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
352+
pud_t *pudp, unsigned long addr, pgprot_t newprot,
353+
unsigned long cp_flags) { return 0; }
354+
#endif
355+
345356
#define split_huge_pud(__vma, __pud, __address) \
346357
do { \
347358
pud_t *____pud = (__pud); \
@@ -585,6 +596,19 @@ static inline int next_order(unsigned long *orders, int prev)
585596
{
586597
return 0;
587598
}
599+
600+
static inline void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
601+
unsigned long address)
602+
{
603+
}
604+
605+
static inline int change_huge_pud(struct mmu_gather *tlb,
606+
struct vm_area_struct *vma, pud_t *pudp,
607+
unsigned long addr, pgprot_t newprot,
608+
unsigned long cp_flags)
609+
{
610+
return 0;
611+
}
588612
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
589613

590614
static inline int split_folio_to_list_to_order(struct folio *folio,

include/linux/pgtable.h

+6
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,12 @@ static inline void arch_check_zapped_pmd(struct vm_area_struct *vma,
447447
}
448448
#endif
449449

450+
#ifndef arch_check_zapped_pud
451+
static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud)
452+
{
453+
}
454+
#endif
455+
450456
#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
451457
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
452458
unsigned long address,

mm/huge_memory.c

+55-1
Original file line numberDiff line numberDiff line change
@@ -2119,6 +2119,53 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
21192119
return ret;
21202120
}
21212121

2122+
/*
2123+
* Returns:
2124+
*
2125+
* - 0: if pud leaf changed from under us
2126+
* - 1: if pud can be skipped
2127+
* - HPAGE_PUD_NR: if pud was successfully processed
2128+
*/
2129+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
2130+
int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
2131+
pud_t *pudp, unsigned long addr, pgprot_t newprot,
2132+
unsigned long cp_flags)
2133+
{
2134+
struct mm_struct *mm = vma->vm_mm;
2135+
pud_t oldpud, entry;
2136+
spinlock_t *ptl;
2137+
2138+
tlb_change_page_size(tlb, HPAGE_PUD_SIZE);
2139+
2140+
/* NUMA balancing doesn't apply to dax */
2141+
if (cp_flags & MM_CP_PROT_NUMA)
2142+
return 1;
2143+
2144+
/*
2145+
* Huge entries on userfault-wp only works with anonymous, while we
2146+
* don't have anonymous PUDs yet.
2147+
*/
2148+
if (WARN_ON_ONCE(cp_flags & MM_CP_UFFD_WP_ALL))
2149+
return 1;
2150+
2151+
ptl = __pud_trans_huge_lock(pudp, vma);
2152+
if (!ptl)
2153+
return 0;
2154+
2155+
/*
2156+
* Can't clear PUD or it can race with concurrent zapping. See
2157+
* change_huge_pmd().
2158+
*/
2159+
oldpud = pudp_invalidate(vma, addr, pudp);
2160+
entry = pud_modify(oldpud, newprot);
2161+
set_pud_at(mm, addr, pudp, entry);
2162+
tlb_flush_pud_range(tlb, addr, HPAGE_PUD_SIZE);
2163+
2164+
spin_unlock(ptl);
2165+
return HPAGE_PUD_NR;
2166+
}
2167+
#endif
2168+
21222169
#ifdef CONFIG_USERFAULTFD
21232170
/*
21242171
* The PT lock for src_pmd and dst_vma/src_vma (for reading) are locked by
@@ -2298,12 +2345,14 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma,
22982345
pud_t *pud, unsigned long addr)
22992346
{
23002347
spinlock_t *ptl;
2348+
pud_t orig_pud;
23012349

23022350
ptl = __pud_trans_huge_lock(pud, vma);
23032351
if (!ptl)
23042352
return 0;
23052353

2306-
pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
2354+
orig_pud = pudp_huge_get_and_clear_full(vma, addr, pud, tlb->fullmm);
2355+
arch_check_zapped_pud(vma, orig_pud);
23072356
tlb_remove_pud_tlb_entry(tlb, pud, addr);
23082357
if (vma_is_special_huge(vma)) {
23092358
spin_unlock(ptl);
@@ -2347,6 +2396,11 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
23472396
spin_unlock(ptl);
23482397
mmu_notifier_invalidate_range_end(&range);
23492398
}
2399+
#else
2400+
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
2401+
unsigned long address)
2402+
{
2403+
}
23502404
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
23512405

23522406
static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,

0 commit comments

Comments
 (0)