@@ -580,8 +580,9 @@ static bool move_pgt_entry(enum pgt_entry entry, struct vm_area_struct *vma,
580
580
* the VMA that is created to span the source and destination of the move,
581
581
* so we make an exception for it.
582
582
*/
583
- static bool can_align_down (struct vm_area_struct * vma , unsigned long addr_to_align ,
584
- unsigned long mask , bool for_stack )
583
+ static bool can_align_down (struct pagetable_move_control * pmc ,
584
+ struct vm_area_struct * vma , unsigned long addr_to_align ,
585
+ unsigned long mask )
585
586
{
586
587
unsigned long addr_masked = addr_to_align & mask ;
587
588
@@ -590,11 +591,11 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali
590
591
* of the corresponding VMA, we can't align down or we will destroy part
591
592
* of the current mapping.
592
593
*/
593
- if (!for_stack && vma -> vm_start != addr_to_align )
594
+ if (!pmc -> for_stack && vma -> vm_start != addr_to_align )
594
595
return false;
595
596
596
597
/* In the stack case we explicitly permit in-VMA alignment. */
597
- if (for_stack && addr_masked >= vma -> vm_start )
598
+ if (pmc -> for_stack && addr_masked >= vma -> vm_start )
598
599
return true;
599
600
600
601
/*
@@ -604,54 +605,131 @@ static bool can_align_down(struct vm_area_struct *vma, unsigned long addr_to_ali
604
605
return find_vma_intersection (vma -> vm_mm , addr_masked , vma -> vm_start ) == NULL ;
605
606
}
606
607
607
- /* Opportunistically realign to specified boundary for faster copy. */
608
- static void try_realign_addr (unsigned long * old_addr , struct vm_area_struct * old_vma ,
609
- unsigned long * new_addr , struct vm_area_struct * new_vma ,
610
- unsigned long mask , bool for_stack )
608
+ /*
609
+ * Determine if are in fact able to realign for efficiency to a higher page
610
+ * table boundary.
611
+ */
612
+ static bool can_realign_addr (struct pagetable_move_control * pmc ,
613
+ unsigned long pagetable_mask )
611
614
{
615
+ unsigned long align_mask = ~pagetable_mask ;
616
+ unsigned long old_align = pmc -> old_addr & align_mask ;
617
+ unsigned long new_align = pmc -> new_addr & align_mask ;
618
+ unsigned long pagetable_size = align_mask + 1 ;
619
+ unsigned long old_align_next = pagetable_size - old_align ;
620
+
621
+ /*
622
+ * We don't want to have to go hunting for VMAs from the end of the old
623
+ * VMA to the next page table boundary, also we want to make sure the
624
+ * operation is wortwhile.
625
+ *
626
+ * So ensure that we only perform this realignment if the end of the
627
+ * range being copied reaches or crosses the page table boundary.
628
+ *
629
+ * boundary boundary
630
+ * .<- old_align -> .
631
+ * . |----------------.-----------|
632
+ * . | vma . |
633
+ * . |----------------.-----------|
634
+ * . <----------------.----------->
635
+ * . len_in
636
+ * <------------------------------->
637
+ * . pagetable_size .
638
+ * . <---------------->
639
+ * . old_align_next .
640
+ */
641
+ if (pmc -> len_in < old_align_next )
642
+ return false;
643
+
612
644
/* Skip if the addresses are already aligned. */
613
- if (( * old_addr & ~ mask ) == 0 )
614
- return ;
645
+ if (old_align == 0 )
646
+ return false ;
615
647
616
648
/* Only realign if the new and old addresses are mutually aligned. */
617
- if (( * old_addr & ~ mask ) != ( * new_addr & ~ mask ) )
618
- return ;
649
+ if (old_align != new_align )
650
+ return false ;
619
651
620
652
/* Ensure realignment doesn't cause overlap with existing mappings. */
621
- if (!can_align_down (old_vma , * old_addr , mask , for_stack ) ||
622
- !can_align_down (new_vma , * new_addr , mask , for_stack ))
653
+ if (!can_align_down (pmc , pmc -> old , pmc -> old_addr , pagetable_mask ) ||
654
+ !can_align_down (pmc , pmc -> new , pmc -> new_addr , pagetable_mask ))
655
+ return false;
656
+
657
+ return true;
658
+ }
659
+
660
+ /*
661
+ * Opportunistically realign to specified boundary for faster copy.
662
+ *
663
+ * Consider an mremap() of a VMA with page table boundaries as below, and no
664
+ * preceding VMAs from the lower page table boundary to the start of the VMA,
665
+ * with the end of the range reaching or crossing the page table boundary.
666
+ *
667
+ * boundary boundary
668
+ * . |----------------.-----------|
669
+ * . | vma . |
670
+ * . |----------------.-----------|
671
+ * . pmc->old_addr . pmc->old_end
672
+ * . <---------------------------->
673
+ * . move these page tables
674
+ *
675
+ * If we proceed with moving page tables in this scenario, we will have a lot of
676
+ * work to do traversing old page tables and establishing new ones in the
677
+ * destination across multiple lower level page tables.
678
+ *
679
+ * The idea here is simply to align pmc->old_addr, pmc->new_addr down to the
680
+ * page table boundary, so we can simply copy a single page table entry for the
681
+ * aligned portion of the VMA instead:
682
+ *
683
+ * boundary boundary
684
+ * . |----------------.-----------|
685
+ * . | vma . |
686
+ * . |----------------.-----------|
687
+ * pmc->old_addr . pmc->old_end
688
+ * <------------------------------------------->
689
+ * . move these page tables
690
+ */
691
+ static void try_realign_addr (struct pagetable_move_control * pmc ,
692
+ unsigned long pagetable_mask )
693
+ {
694
+
695
+ if (!can_realign_addr (pmc , pagetable_mask ))
623
696
return ;
624
697
625
- * old_addr = * old_addr & mask ;
626
- * new_addr = * new_addr & mask ;
698
+ /*
699
+ * Simply align to page table boundaries. Note that we do NOT update the
700
+ * pmc->old_end value, and since the move_page_tables() operation spans
701
+ * from [old_addr, old_end) (offsetting new_addr as it is performed),
702
+ * this simply changes the start of the copy, not the end.
703
+ */
704
+ pmc -> old_addr &= pagetable_mask ;
705
+ pmc -> new_addr &= pagetable_mask ;
627
706
}
628
707
629
- unsigned long move_page_tables (struct vm_area_struct * vma ,
630
- unsigned long old_addr , struct vm_area_struct * new_vma ,
631
- unsigned long new_addr , unsigned long len ,
632
- bool need_rmap_locks , bool for_stack )
708
+ unsigned long move_page_tables (struct pagetable_move_control * pmc )
633
709
{
634
710
unsigned long extent , old_end ;
635
711
struct mmu_notifier_range range ;
636
712
pmd_t * old_pmd , * new_pmd ;
637
713
pud_t * old_pud , * new_pud ;
714
+ unsigned long old_addr , new_addr ;
715
+ struct vm_area_struct * vma = pmc -> old ;
638
716
639
- if (!len )
717
+ if (!pmc -> len_in )
640
718
return 0 ;
641
719
642
- old_end = old_addr + len ;
643
-
644
720
if (is_vm_hugetlb_page (vma ))
645
- return move_hugetlb_page_tables (vma , new_vma , old_addr ,
646
- new_addr , len );
721
+ return move_hugetlb_page_tables (pmc -> old , pmc -> new , pmc -> old_addr ,
722
+ pmc -> new_addr , pmc -> len_in );
647
723
724
+ old_end = pmc -> old_end ;
648
725
/*
649
726
* If possible, realign addresses to PMD boundary for faster copy.
650
727
* Only realign if the mremap copying hits a PMD boundary.
651
728
*/
652
- if (len >= PMD_SIZE - (old_addr & ~PMD_MASK ))
653
- try_realign_addr (& old_addr , vma , & new_addr , new_vma , PMD_MASK ,
654
- for_stack );
729
+ try_realign_addr (pmc , PMD_MASK );
730
+ /* These may have been changed. */
731
+ old_addr = pmc -> old_addr ;
732
+ new_addr = pmc -> new_addr ;
655
733
656
734
flush_cache_range (vma , old_addr , old_end );
657
735
mmu_notifier_range_init (& range , MMU_NOTIFY_UNMAP , 0 , vma -> vm_mm ,
@@ -675,12 +753,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
675
753
if (pud_trans_huge (* old_pud ) || pud_devmap (* old_pud )) {
676
754
if (extent == HPAGE_PUD_SIZE ) {
677
755
move_pgt_entry (HPAGE_PUD , vma , old_addr , new_addr ,
678
- old_pud , new_pud , need_rmap_locks );
756
+ old_pud , new_pud , pmc -> need_rmap_locks );
679
757
/* We ignore and continue on error? */
680
758
continue ;
681
759
}
682
760
} else if (IS_ENABLED (CONFIG_HAVE_MOVE_PUD ) && extent == PUD_SIZE ) {
683
-
684
761
if (move_pgt_entry (NORMAL_PUD , vma , old_addr , new_addr ,
685
762
old_pud , new_pud , true))
686
763
continue ;
@@ -698,7 +775,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
698
775
pmd_devmap (* old_pmd )) {
699
776
if (extent == HPAGE_PMD_SIZE &&
700
777
move_pgt_entry (HPAGE_PMD , vma , old_addr , new_addr ,
701
- old_pmd , new_pmd , need_rmap_locks ))
778
+ old_pmd , new_pmd , pmc -> need_rmap_locks ))
702
779
continue ;
703
780
split_huge_pmd (vma , old_pmd , old_addr );
704
781
} else if (IS_ENABLED (CONFIG_HAVE_MOVE_PMD ) &&
@@ -713,10 +790,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
713
790
}
714
791
if (pmd_none (* old_pmd ))
715
792
continue ;
716
- if (pte_alloc (new_vma -> vm_mm , new_pmd ))
793
+ if (pte_alloc (pmc -> new -> vm_mm , new_pmd ))
717
794
break ;
718
795
if (move_ptes (vma , old_pmd , old_addr , old_addr + extent ,
719
- new_vma , new_pmd , new_addr , need_rmap_locks ) < 0 )
796
+ pmc -> new , new_pmd , new_addr , pmc -> need_rmap_locks ) < 0 )
720
797
goto again ;
721
798
}
722
799
@@ -726,10 +803,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
726
803
* Prevent negative return values when {old,new}_addr was realigned
727
804
* but we broke out of the above loop for the first PMD itself.
728
805
*/
729
- if (old_addr < old_end - len )
806
+ if (old_addr < old_end - pmc -> len_in )
730
807
return 0 ;
731
808
732
- return len + old_addr - old_end ; /* how much done */
809
+ return pmc -> len_in + old_addr - old_end ; /* how much done */
733
810
}
734
811
735
812
/* Set vrm->delta to the difference in VMA size specified by user. */
@@ -1040,37 +1117,40 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm,
1040
1117
unsigned long internal_pgoff = internal_offset >> PAGE_SHIFT ;
1041
1118
unsigned long new_pgoff = vrm -> vma -> vm_pgoff + internal_pgoff ;
1042
1119
unsigned long moved_len ;
1043
- bool need_rmap_locks ;
1044
- struct vm_area_struct * vma ;
1120
+ struct vm_area_struct * vma = vrm -> vma ;
1045
1121
struct vm_area_struct * new_vma ;
1046
1122
int err = 0 ;
1123
+ PAGETABLE_MOVE (pmc , NULL , NULL , vrm -> addr , vrm -> new_addr , vrm -> old_len );
1047
1124
1048
- new_vma = copy_vma (& vrm -> vma , vrm -> new_addr , vrm -> new_len , new_pgoff ,
1049
- & need_rmap_locks );
1125
+ new_vma = copy_vma (& vma , vrm -> new_addr , vrm -> new_len , new_pgoff ,
1126
+ & pmc . need_rmap_locks );
1050
1127
if (!new_vma ) {
1051
1128
vrm_uncharge (vrm );
1052
1129
* new_vma_ptr = NULL ;
1053
1130
return - ENOMEM ;
1054
1131
}
1055
- vma = vrm -> vma ;
1132
+ vrm -> vma = vma ;
1133
+ pmc .old = vma ;
1134
+ pmc .new = new_vma ;
1056
1135
1057
- moved_len = move_page_tables (vma , vrm -> addr , new_vma ,
1058
- vrm -> new_addr , vrm -> old_len ,
1059
- need_rmap_locks , /* for_stack= */ false);
1136
+ moved_len = move_page_tables (& pmc );
1060
1137
if (moved_len < vrm -> old_len )
1061
1138
err = - ENOMEM ;
1062
1139
else if (vma -> vm_ops && vma -> vm_ops -> mremap )
1063
1140
err = vma -> vm_ops -> mremap (new_vma );
1064
1141
1065
1142
if (unlikely (err )) {
1143
+ PAGETABLE_MOVE (pmc_revert , new_vma , vma , vrm -> new_addr ,
1144
+ vrm -> addr , moved_len );
1145
+
1066
1146
/*
1067
1147
* On error, move entries back from new area to old,
1068
1148
* which will succeed since page tables still there,
1069
1149
* and then proceed to unmap new area instead of old.
1070
1150
*/
1071
- move_page_tables ( new_vma , vrm -> new_addr , vma , vrm -> addr ,
1072
- moved_len , /* need_rmap_locks = */ true,
1073
- /* for_stack= */ false);
1151
+ pmc_revert . need_rmap_locks = true;
1152
+ move_page_tables ( & pmc_revert );
1153
+
1074
1154
vrm -> vma = new_vma ;
1075
1155
vrm -> old_len = vrm -> new_len ;
1076
1156
vrm -> addr = vrm -> new_addr ;
0 commit comments