@@ -1009,7 +1009,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
1009
1009
};
1010
1010
1011
1011
if (!pte ++ ) {
1012
- pte = pte_offset_map_nolock (mm , pmd , address , & ptl );
1012
+ /*
1013
+ * Here the ptl is only used to check pte_same() in
1014
+ * do_swap_page(), so readonly version is enough.
1015
+ */
1016
+ pte = pte_offset_map_ro_nolock (mm , pmd , address , & ptl );
1013
1017
if (!pte ) {
1014
1018
mmap_read_unlock (mm );
1015
1019
result = SCAN_PMD_NULL ;
@@ -1598,14 +1602,17 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
1598
1602
if (userfaultfd_armed (vma ) && !(vma -> vm_flags & VM_SHARED ))
1599
1603
pml = pmd_lock (mm , pmd );
1600
1604
1601
- start_pte = pte_offset_map_nolock (mm , pmd , haddr , & ptl );
1605
+ start_pte = pte_offset_map_rw_nolock (mm , pmd , haddr , & pgt_pmd , & ptl );
1602
1606
if (!start_pte ) /* mmap_lock + page lock should prevent this */
1603
1607
goto abort ;
1604
1608
if (!pml )
1605
1609
spin_lock (ptl );
1606
1610
else if (ptl != pml )
1607
1611
spin_lock_nested (ptl , SINGLE_DEPTH_NESTING );
1608
1612
1613
+ if (unlikely (!pmd_same (pgt_pmd , pmdp_get_lockless (pmd ))))
1614
+ goto abort ;
1615
+
1609
1616
/* step 2: clear page table and adjust rmap */
1610
1617
for (i = 0 , addr = haddr , pte = start_pte ;
1611
1618
i < HPAGE_PMD_NR ; i ++ , addr += PAGE_SIZE , pte ++ ) {
@@ -1651,6 +1658,16 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
1651
1658
/* step 4: remove empty page table */
1652
1659
if (!pml ) {
1653
1660
pml = pmd_lock (mm , pmd );
1661
+ /*
1662
+ * We called pte_unmap() and release the ptl before acquiring
1663
+ * the pml, which means we left the RCU critical section, so the
1664
+ * PTE page may have been freed, so we must do pmd_same() check
1665
+ * before reacquiring the ptl.
1666
+ */
1667
+ if (unlikely (!pmd_same (pgt_pmd , pmdp_get_lockless (pmd )))) {
1668
+ spin_unlock (pml );
1669
+ goto pmd_change ;
1670
+ }
1654
1671
if (ptl != pml )
1655
1672
spin_lock_nested (ptl , SINGLE_DEPTH_NESTING );
1656
1673
}
@@ -1682,6 +1699,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
1682
1699
pte_unmap_unlock (start_pte , ptl );
1683
1700
if (pml && pml != ptl )
1684
1701
spin_unlock (pml );
1702
+ pmd_change :
1685
1703
if (notified )
1686
1704
mmu_notifier_invalidate_range_end (& range );
1687
1705
drop_folio :
@@ -1703,6 +1721,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1703
1721
spinlock_t * pml ;
1704
1722
spinlock_t * ptl ;
1705
1723
bool skipped_uffd = false;
1724
+ pte_t * pte ;
1706
1725
1707
1726
/*
1708
1727
* Check vma->anon_vma to exclude MAP_PRIVATE mappings that
@@ -1738,11 +1757,25 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
1738
1757
addr , addr + HPAGE_PMD_SIZE );
1739
1758
mmu_notifier_invalidate_range_start (& range );
1740
1759
1760
+ pte = pte_offset_map_rw_nolock (mm , pmd , addr , & pgt_pmd , & ptl );
1761
+ if (!pte ) {
1762
+ mmu_notifier_invalidate_range_end (& range );
1763
+ continue ;
1764
+ }
1765
+
1741
1766
pml = pmd_lock (mm , pmd );
1742
- ptl = pte_lockptr (mm , pmd );
1743
1767
if (ptl != pml )
1744
1768
spin_lock_nested (ptl , SINGLE_DEPTH_NESTING );
1745
1769
1770
+ if (unlikely (!pmd_same (pgt_pmd , pmdp_get_lockless (pmd )))) {
1771
+ pte_unmap_unlock (pte , ptl );
1772
+ if (ptl != pml )
1773
+ spin_unlock (pml );
1774
+ mmu_notifier_invalidate_range_end (& range );
1775
+ continue ;
1776
+ }
1777
+ pte_unmap (pte );
1778
+
1746
1779
/*
1747
1780
* Huge page lock is still held, so normally the page table
1748
1781
* must remain empty; and we have already skipped anon_vma
0 commit comments