@@ -196,21 +196,21 @@ class ThrowableErrorHandler : public ErrorHandler
196
196
197
197
// helper functions
198
198
199
- a64::Vec select_register (a64::Vec const ®, uint32_t regsize)
199
+ inline a64::Vec select_register (a64::Vec const ®, uint32_t regsize)
200
200
{
201
201
if (regsize == 4 )
202
202
return reg.s ();
203
203
return reg.d ();
204
204
}
205
205
206
- a64::Gp select_register (a64::Gp const ®, uint32_t regsize)
206
+ inline a64::Gp select_register (a64::Gp const ®, uint32_t regsize)
207
207
{
208
208
if (regsize == 4 )
209
209
return reg.w ();
210
210
return reg.x ();
211
211
}
212
212
213
- bool is_valid_immediate_mask (uint64_t val, size_t bytes)
213
+ inline bool is_valid_immediate_mask (uint64_t val, size_t bytes)
214
214
{
215
215
// all zeros and all ones aren't allowed, and disallow any value with bits outside of the max bit range
216
216
if (val == 0 || val == make_bitmask<uint64_t >(bytes * 8 ))
@@ -231,18 +231,28 @@ bool is_valid_immediate_mask(uint64_t val, size_t bytes)
231
231
return population_count_64 (val) == head - tail;
232
232
}
233
233
234
- bool is_valid_immediate (uint64_t val, size_t bits)
234
+ inline bool is_valid_immediate (uint64_t val, size_t bits)
235
235
{
236
236
assert (bits < 64 );
237
237
return val < (uint64_t (1 ) << bits);
238
238
}
239
239
240
- bool is_valid_immediate_signed (int64_t val, size_t bits)
240
+ inline constexpr bool is_valid_immediate_signed (int64_t val, size_t bits)
241
241
{
242
242
return util::sext (val, bits) == val;
243
243
}
244
244
245
- bool emit_add_optimized (a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
245
+ inline constexpr bool is_valid_offset (int64_t diff, int max_shift)
246
+ {
247
+ if (is_valid_immediate_signed (diff, 9 ))
248
+ return true ; // 9-bit signed offset
249
+ else if ((diff >= 0 ) && (diff < (1 << (12 + max_shift))) && !(diff & make_bitmask<int64_t >(max_shift)))
250
+ return true ; // 12-bit unsigned offset shifted by operand size
251
+ else
252
+ return false ;
253
+ }
254
+
255
+ inline bool emit_add_optimized (a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
246
256
{
247
257
// If the bottom 12 bits are 0s then an optimized form can be used if the remaining bits are <= 12
248
258
if (is_valid_immediate (val, 12 ) || ((val & 0xfff ) == 0 && is_valid_immediate (val >> 12 , 12 )))
@@ -254,7 +264,7 @@ bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &sr
254
264
return false ;
255
265
}
256
266
257
- bool emit_sub_optimized (a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
267
+ inline bool emit_sub_optimized (a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val)
258
268
{
259
269
if (val < 0 )
260
270
val = -val;
@@ -509,11 +519,11 @@ void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp ®, const
509
519
a.mov (reg, val);
510
520
}
511
521
512
- void drcbe_arm64::emit_ldr_str_base_mem (a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, const void *ptr) const
522
+ inline void drcbe_arm64::emit_ldr_str_base_mem (a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, int max_shift , const void *ptr) const
513
523
{
514
- // If it can fit as a constant offset
524
+ // If it can fit as an immediate offset
515
525
const int64_t diff = (int64_t )ptr - (int64_t )m_baseptr;
516
- if (is_valid_immediate_signed (diff, 9 ))
526
+ if (is_valid_offset (diff, max_shift ))
517
527
{
518
528
a.emit (opcode, reg, arm::Mem (BASE_REG, diff));
519
529
return ;
@@ -547,31 +557,30 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode,
547
557
return ;
548
558
}
549
559
550
- if (diff >= 0 )
560
+ // If it's in a nearby page
561
+ const uint64_t pagebase = codeoffs & ~make_bitmask<uint64_t >(12 );
562
+ const int64_t pagerel = (int64_t )ptr - pagebase;
563
+ if (is_valid_immediate_signed (pagerel, 21 + 12 ))
551
564
{
552
- int shift = 0 ;
553
- int max_shift = 0 ;
565
+ const uint64_t targetpage = ( uint64_t )ptr & ~make_bitmask< uint64_t >( 12 ) ;
566
+ const uint64_t pageoffs = ( uint64_t )ptr & util::make_bitmask< uint64_t >( 12 ) ;
554
567
555
- if (opcode == a64::Inst::kIdLdrb || opcode == a64::Inst::kIdLdrsb )
556
- max_shift = 0 ;
557
- else if (opcode == a64::Inst::kIdLdrh || opcode == a64::Inst::kIdLdrsh )
558
- max_shift = 1 ;
559
- else if (opcode == a64::Inst::kIdLdrsw )
560
- max_shift = 2 ;
568
+ a.adrp (MEM_SCRATCH_REG, targetpage);
569
+ if (is_valid_offset (pageoffs, max_shift))
570
+ {
571
+ a.emit (opcode, reg, arm::Mem (MEM_SCRATCH_REG, pageoffs));
572
+ }
561
573
else
562
- max_shift = (reg.isGpW () || reg.isVecS ()) ? 2 : 3 ;
563
-
564
- for (int i = 0 ; i < 64 && max_shift > 0 ; i++)
565
574
{
566
- if ((uint64_t )ptr & ((uint64_t )(1 ) << i))
567
- {
568
- shift = i;
569
- break ;
570
- }
575
+ a.add (MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs);
576
+ a.emit (opcode, reg, arm::Mem (MEM_SCRATCH_REG));
571
577
}
578
+ return ;
579
+ }
572
580
573
- if (shift > max_shift)
574
- shift = max_shift;
581
+ if (diff >= 0 )
582
+ {
583
+ const int shift = (diff & make_bitmask<int64_t >(max_shift)) ? 0 : max_shift;
575
584
576
585
if (is_valid_immediate (diff >> shift, 32 ))
577
586
{
@@ -586,44 +595,23 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode,
586
595
}
587
596
}
588
597
589
- const uint64_t pagebase = codeoffs & ~make_bitmask<uint64_t >(12 );
590
- const int64_t pagerel = (int64_t )ptr - pagebase;
591
- if (is_valid_immediate_signed (pagerel, 21 + 12 ))
592
- {
593
- const uint64_t targetpage = (uint64_t )ptr & ~make_bitmask<uint64_t >(12 );
594
- const uint64_t pageoffs = (uint64_t )ptr & util::make_bitmask<uint64_t >(12 );
595
-
596
- a.adrp (MEM_SCRATCH_REG, targetpage);
597
-
598
- if (is_valid_immediate_signed (pageoffs, 9 ))
599
- {
600
- a.emit (opcode, reg, arm::Mem (MEM_SCRATCH_REG, pageoffs));
601
- return ;
602
- }
603
- else if (emit_add_optimized (a, MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs))
604
- {
605
- a.emit (opcode, reg, arm::Mem (MEM_SCRATCH_REG));
606
- return ;
607
- }
608
- }
609
-
610
598
// Can't optimize it at all, most likely becomes 4 MOV commands
611
599
a.mov (MEM_SCRATCH_REG, ptr);
612
600
a.emit (opcode, reg, arm::Mem (MEM_SCRATCH_REG));
613
601
}
614
602
615
- void drcbe_arm64::emit_ldr_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdr , reg, ptr); }
616
- void drcbe_arm64::emit_ldrb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrb , reg, ptr); }
617
- void drcbe_arm64::emit_ldrh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrh , reg, ptr); }
618
- void drcbe_arm64::emit_ldrsb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsb , reg, ptr); }
619
- void drcbe_arm64::emit_ldrsh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsh , reg, ptr); }
620
- void drcbe_arm64::emit_ldrsw_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsw , reg, ptr); }
621
- void drcbe_arm64::emit_str_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStr , reg, ptr); }
622
- void drcbe_arm64::emit_strb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStrb , reg, ptr); }
623
- void drcbe_arm64::emit_strh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStrh , reg, ptr); }
603
+ void drcbe_arm64::emit_ldr_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdr , reg, reg. isGpW () ? 2 : 3 , ptr); }
604
+ void drcbe_arm64::emit_ldrb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrb , reg, 0 , ptr); }
605
+ void drcbe_arm64::emit_ldrh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrh , reg, 1 , ptr); }
606
+ void drcbe_arm64::emit_ldrsb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsb , reg, 0 , ptr); }
607
+ void drcbe_arm64::emit_ldrsh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsh , reg, 1 , ptr); }
608
+ void drcbe_arm64::emit_ldrsw_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdrsw , reg, 2 , ptr); }
609
+ void drcbe_arm64::emit_str_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStr , reg, reg. isGpW () ? 2 : 3 , ptr); }
610
+ void drcbe_arm64::emit_strb_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStrb , reg, 0 , ptr); }
611
+ void drcbe_arm64::emit_strh_mem (a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStrh , reg, 1 , ptr); }
624
612
625
- void drcbe_arm64::emit_float_ldr_mem (a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdr_v , reg, ptr); }
626
- void drcbe_arm64::emit_float_str_mem (a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStr_v , reg, ptr); }
613
+ void drcbe_arm64::emit_float_ldr_mem (a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdLdr_v , reg, reg. isVecS () ? 2 : 3 , ptr); }
614
+ void drcbe_arm64::emit_float_str_mem (a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem (a, a64::Inst::kIdStr_v , reg, reg. isVecS () ? 2 : 3 , ptr); }
627
615
628
616
void drcbe_arm64::mov_reg_param (a64::Assembler &a, uint32_t regsize, const a64::Gp &dst, const be_parameter &src) const
629
617
{
@@ -803,7 +791,7 @@ void drcbe_arm64::call_arm_addr(a64::Assembler &a, const void *offs) const
803
791
{
804
792
const uint64_t codeoffs = a.code ()->baseAddress () + a.offset ();
805
793
const int64_t reloffs = (int64_t )offs - codeoffs;
806
- if (is_valid_immediate_signed (reloffs, 26 ))
794
+ if (is_valid_immediate_signed (reloffs, 26 + 2 ))
807
795
{
808
796
a.bl (offs);
809
797
}
0 commit comments