Skip to content

Commit 6c34468

Browse files
committed
ARM port: replace Psavelr pseudo-instruction by actual instructions
Saving the return address register (R14) in the function prologue can be done either with a single "str" instruction if the offset is small enough, or by constructing the address using the R12 register as a temporary then storing with "str" relative to R12. R12 can be used as a temporary because it is marked as destroyed at function entry. We just need to tell Asmgen.translcode whether R12 still contains the back link left there by Pallocframe, or R12 was trashed. In the latter case R12 will be reloaded from the stack at the next Mgetparam instruction.
1 parent 82c4547 commit 6c34468

File tree

6 files changed

+88
-47
lines changed

6 files changed

+88
-47
lines changed

arm/Asm.v

-15
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ Inductive instruction : Type :=
201201
(* Pseudo-instructions *)
202202
| Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *)
203203
| Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *)
204-
| Psavelr: ptrofs -> instruction (**r store link register in allocated stack frame *)
205204
| Plabel: label -> instruction (**r define a code label *)
206205
| Ploadsymbol: ireg -> ident -> ptrofs -> instruction (**r load the address of a symbol *)
207206
| Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *)
@@ -270,18 +269,6 @@ lbl: .word symbol
270269
>>
271270
Again, our memory model cannot comprehend that this operation
272271
frees (logically) the current stack frame.
273-
- [Psavelr pos]: this pseudo-instruction stores the link register in the
274-
stackframe at the specified position. It handles out-of-range offsets,
275-
i.e. the generated code for in-range offsets is:
276-
<<
277-
str lr, [sp, #pos]
278-
>>
279-
and for out-of-range offsets:
280-
<<
281-
add sp, sp, #pos (* code to expand the immediate *)
282-
str lr, [sp]
283-
sub sp, sp, #pos (* code to expand the immediate *)
284-
>>
285272
- [Pbtbl reg table]: this is a N-way branch, implemented via a jump table
286273
as follows:
287274
<<
@@ -750,8 +737,6 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
750737
| _ => Stuck
751738
end
752739
end
753-
| Psavelr pos =>
754-
exec_store Mint32 (Val.offset_ptr rs#IR13 pos) IR14 rs m
755740
| Plabel lbl =>
756741
Next (nextinstr rs) m
757742
| Ploadsymbol r1 lbl ofs =>

arm/Asmexpand.ml

-7
Original file line numberDiff line numberDiff line change
@@ -445,13 +445,6 @@ let expand_instruction instr =
445445
end else
446446
emit (Pldr (IR13,IR13,SOimm ofs));
447447
end
448-
| Psavelr ofs ->
449-
if camlint_of_coqint ofs >= 4096l then begin
450-
expand_addimm IR13 IR13 ofs;
451-
emit (Pstr (IR14,IR13,SOimm _0));
452-
expand_subimm IR13 IR13 ofs
453-
end else
454-
emit (Pstr (IR14,IR13,SOimm ofs))
455448
| Pbuiltin (ef,args,res) ->
456449
begin match ef with
457450
| EF_builtin (name,sg) ->

arm/Asmgen.v

+20-3
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,22 @@ Definition storeind (src: mreg) (base: ireg) (ofs: ptrofs) (ty: typ) (k: code) :
602602
Error (msg "Asmgen.storeind")
603603
end.
604604

605+
(** This is a variant of [storeind] that is used to save the return address
606+
at the beginning of a function. It uses [R12] instead of [R14] as
607+
temporary register. *)
608+
609+
Definition save_lr (ofs: ptrofs) (k: code) :=
610+
let n := Ptrofs.to_int ofs in
611+
let n1 := mk_immed_mem_word n in
612+
if Int.eq n n1
613+
then Pstr IR14 IR13 (SOimm n) :: k
614+
else addimm IR12 IR13 (Int.sub n n1) (Pstr IR14 IR12 (SOimm n1) :: k).
615+
616+
Definition save_lr_preserves_R12 (ofs: ptrofs) : bool :=
617+
let n := Ptrofs.to_int ofs in
618+
let n1 := mk_immed_mem_word n in
619+
Int.eq n n1.
620+
605621
(** Translation of memory accesses *)
606622

607623
Definition transl_memory_access
@@ -787,11 +803,12 @@ Definition transl_code' (f: Mach.function) (il: list Mach.instruction) (it1p: bo
787803
around, leading to incorrect executions. *)
788804

789805
Definition transl_function (f: Mach.function) :=
790-
do c <- transl_code f f.(Mach.fn_code) true;
806+
do c <- transl_code f f.(Mach.fn_code)
807+
(save_lr_preserves_R12 f.(fn_retaddr_ofs));
791808
OK (mkfunction f.(Mach.fn_sig)
792809
(Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
793-
Psavelr f.(fn_retaddr_ofs) ::
794-
Pcfi_rel_offset (Ptrofs.to_int f.(fn_retaddr_ofs)):: c)).
810+
save_lr f.(fn_retaddr_ofs)
811+
(Pcfi_rel_offset (Ptrofs.to_int f.(fn_retaddr_ofs)):: c))).
795812

796813
Definition transf_function (f: Mach.function) : res Asm.function :=
797814
do tf <- transl_function f;

arm/Asmgenproof.v

+37-20
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,15 @@ Proof.
241241
destruct ty, (preg_of src); inv H; TailNoLabel.
242242
Qed.
243243

244+
Remark save_lr_label:
245+
forall ofs k, tail_nolabel k (save_lr ofs k).
246+
Proof.
247+
unfold save_lr; intros.
248+
destruct (Int.eq (Ptrofs.to_int ofs) (mk_immed_mem_word (Ptrofs.to_int ofs))).
249+
TailNoLabel.
250+
eapply tail_nolabel_trans; TailNoLabel.
251+
Qed.
252+
244253
Remark transl_cond_label:
245254
forall cond args k c, transl_cond cond args k = OK c -> tail_nolabel k c.
246255
Proof.
@@ -338,7 +347,7 @@ Lemma transl_find_label:
338347
end.
339348
Proof.
340349
intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0.
341-
monadInv EQ. simpl.
350+
monadInv EQ. simpl. erewrite tail_nolabel_find_label by (apply save_lr_label). simpl.
342351
eapply transl_code_label; eauto.
343352
Qed.
344353

@@ -382,7 +391,8 @@ Proof.
382391
destruct i; try (intros [A B]; apply A). intros. subst c0. repeat constructor.
383392
- intros. monadInv H0.
384393
destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x))); inv EQ0. monadInv EQ.
385-
exists x; exists true; split; auto. repeat constructor.
394+
exists x; exists (save_lr_preserves_R12 (fn_retaddr_ofs f0)); split; auto.
395+
constructor. eapply is_tail_trans. 2: apply tail_nolabel_is_tail; apply save_lr_label. repeat constructor.
386396
- exact transf_function_no_overflow.
387397
Qed.
388398

@@ -854,9 +864,10 @@ Opaque loadind.
854864
generalize EQ; intros EQ'. monadInv EQ'.
855865
destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0.
856866
monadInv EQ0.
867+
set (ra_ofs := fn_retaddr_ofs f) in *.
868+
set (ra_ofs' := Ptrofs.to_int ra_ofs) in *.
857869
set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) ::
858-
Psavelr (fn_retaddr_ofs f) ::
859-
Pcfi_rel_offset (Ptrofs.to_int (fn_retaddr_ofs f)) :: x0) in *.
870+
save_lr ra_ofs (Pcfi_rel_offset ra_ofs' :: x0)) in *.
860871
set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
861872
unfold store_stack in *.
862873
exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl.
@@ -867,34 +878,40 @@ Opaque loadind.
867878
intros [m3' [P Q]].
868879
(* Execution of function prologue *)
869880
set (rs2 := nextinstr (rs0#IR12 <- (parent_sp s) #IR13 <- (Vptr stk Ptrofs.zero))).
870-
set (rs3 := nextinstr rs2).
881+
edestruct (save_lr_correct tge tf ra_ofs (Pcfi_rel_offset ra_ofs' :: x0) rs2) as (rs3 & X & Y & Z).
882+
change (rs2 IR13) with sp. change (rs2 IR14) with (rs0 IR14). rewrite ATLR. eexact P.
871883
set (rs4 := nextinstr rs3).
872884
assert (EXEC_PROLOGUE:
873885
exec_straight tge tf
874886
(fn_code tf) rs0 m'
875887
x0 rs4 m3').
888+
{
876889
change (fn_code tf) with tfbody; unfold tfbody.
877-
apply exec_straight_three with rs2 m2' rs3 m3'.
890+
eapply exec_straight_trans with (rs2 := rs2) (m2 := m2').
891+
apply exec_straight_one.
878892
unfold exec_instr. rewrite C. fold sp.
879893
rewrite <- (sp_val _ _ _ AG). unfold Tptr, chunk_of_type, Archi.ptr64 in F. rewrite F. auto.
880-
simpl. auto.
881-
simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14).
882-
rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P.
883-
rewrite Ptrofs.add_zero_l in P. rewrite ATLR.
884-
rewrite P. auto. auto. auto. auto. auto.
894+
auto.
895+
eapply exec_straight_trans with (rs2 := rs3) (m2 := m3').
896+
eexact X.
897+
apply exec_straight_one.
898+
simpl; reflexivity. reflexivity.
899+
}
900+
(* After the function prologue is the code for the function body *)
901+
exploit exec_straight_steps_2; eauto using functions_transl. omega. constructor.
902+
intros (ofsbody & U & V).
903+
(* Conclusions *)
885904
left; exists (State rs4 m3'); split.
886905
eapply exec_straight_steps_1; eauto. omega. constructor.
887-
econstructor; eauto.
888-
change (rs4 PC) with (Val.offset_ptr (Val.offset_ptr (Val.offset_ptr (rs0 PC) Ptrofs.one) Ptrofs.one) Ptrofs.one).
889-
rewrite ATPC. simpl. constructor; eauto.
890-
eapply code_tail_next_int. omega.
891-
eapply code_tail_next_int. omega.
892-
eapply code_tail_next_int. omega. constructor.
893-
unfold rs4, rs2.
894-
apply agree_nextinstr. apply agree_nextinstr. apply agree_nextinstr.
906+
econstructor; eauto. rewrite U. econstructor; eauto.
907+
apply agree_nextinstr.
908+
apply agree_undef_regs2 with rs2.
909+
apply agree_nextinstr.
895910
eapply agree_change_sp.
896911
apply agree_undef_regs with rs0; eauto.
897-
intros. Simpl. congruence.
912+
intros; Simpl.
913+
congruence.
914+
intros; apply Y; eauto with asmgen.
898915

899916
- (* external function *)
900917
exploit functions_translated; eauto.

arm/Asmgenproof1.v

+31
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,37 @@ Proof.
616616
intros; Simpl.
617617
Qed.
618618

619+
(** Saving the link register *)
620+
621+
Lemma save_lr_correct:
622+
forall ofs k (rs: regset) m m',
623+
Mem.storev Mint32 m (Val.offset_ptr rs#IR13 ofs) (rs#IR14) = Some m' ->
624+
exists rs',
625+
exec_straight ge fn (save_lr ofs k) rs m k rs' m'
626+
/\ (forall r, if_preg r = true -> r <> IR12 -> rs'#r = rs#r)
627+
/\ (save_lr_preserves_R12 ofs = true -> rs'#IR12 = rs#IR12).
628+
Proof.
629+
intros; unfold save_lr, save_lr_preserves_R12.
630+
set (n := Ptrofs.to_int ofs). set (n1 := mk_immed_mem_word n).
631+
assert (EQ: Val.offset_ptr rs#IR13 ofs = Val.add rs#IR13 (Vint n)).
632+
{ destruct rs#IR13; try discriminate. simpl. f_equal; f_equal. unfold n; symmetry; auto with ptrofs. }
633+
destruct (Int.eq n n1).
634+
- econstructor; split. apply exec_straight_one. simpl; unfold exec_store. rewrite <- EQ, H; reflexivity. auto.
635+
split. intros; Simpl. intros; Simpl.
636+
- destruct (addimm_correct IR12 IR13 (Int.sub n n1) (Pstr IR14 IR12 (SOimm n1) :: k) rs m)
637+
as (rs1 & A & B & C).
638+
econstructor; split.
639+
eapply exec_straight_trans. eexact A.
640+
apply exec_straight_one. simpl; unfold exec_store.
641+
rewrite B. rewrite Val.add_assoc. simpl.
642+
rewrite Int.sub_add_opp. rewrite Int.add_assoc.
643+
rewrite (Int.add_commut (Int.neg n1)).
644+
rewrite Int.add_neg_zero. rewrite Int.add_zero.
645+
rewrite <- EQ. rewrite C by eauto with asmgen. rewrite H. reflexivity.
646+
auto.
647+
split. intros; Simpl. congruence.
648+
Qed.
649+
619650
(** Translation of shift immediates *)
620651

621652
Lemma transl_shift_correct:

arm/TargetPrinter.ml

-2
Original file line numberDiff line numberDiff line change
@@ -693,8 +693,6 @@ struct
693693
assert false
694694
| Pfreeframe(sz, ofs) ->
695695
assert false
696-
| Psavelr(ofs) ->
697-
assert false
698696
| Plabel lbl ->
699697
fprintf oc "%a:\n" print_label lbl; 0
700698
| Ploadsymbol(r1, id, ofs) ->

0 commit comments

Comments
 (0)