Skip to content

Switch lookup tables end up in .data instead of .text by default #69

Open
@gergoerdi

Description

@gergoerdi

I've been struggling for a couple days now on why my CHIP-8 firmware doesn't work anymore, even when downgrading to the exact same rustc/llvm versions as before. Then I found out that it's not because of anything in the compiler, but it's because in the meantime I switched over to using Xargo for linking instead of a custom shell script.

The following program should demonstrate the problem. It is the result of compiling a switch, not unlike the one in #47. The stripped-down IR is as follows:

target datalayout = "e-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8"
target triple = "avr-atmel-none"

@switch.table = private unnamed_addr constant [4 x i8] c"\01\02\03\0C"
@switch.table.1 = private unnamed_addr constant [4 x i8] c"\04\05\06\0D"
@switch.table.2 = private unnamed_addr constant [4 x i8] c"\07\08\09\0E"
@switch.table.3 = private unnamed_addr constant [4 x i8] c"\0A\00\0B\0F"

; Function Attrs: noinline norecurse nounwind uwtable
define internal fastcc void @spi_setup() {
start:
  %0 = load volatile i8, i8* inttoptr (i16 37 to i8*), align 1
  %1 = or i8 %0, 4
  store volatile i8 %1, i8* inttoptr (i16 37 to i8*), align 1
  %2 = load volatile i8, i8* inttoptr (i16 36 to i8*), align 4
  %3 = or i8 %2, 44
  store volatile i8 %3, i8* inttoptr (i16 36 to i8*), align 4
  %4 = load volatile i8, i8* inttoptr (i16 36 to i8*), align 4
  %5 = and i8 %4, -17
  store volatile i8 %5, i8* inttoptr (i16 36 to i8*), align 4
  store volatile i8 80, i8* inttoptr (i16 76 to i8*), align 4
  ret void
}

; Function Attrs: noinline norecurse nounwind uwtable
define internal fastcc void @spi_sync(i8) unnamed_addr #1 {
start:
  store volatile i8 %0, i8* inttoptr (i16 78 to i8*), align 2
  br label %bb2

bb2:                                              ; preds = %bb2, %start
  %1 = load volatile i8, i8* inttoptr (i16 77 to i8*), align 1
  %2 = icmp sgt i8 %1, -1
  br i1 %2, label %bb2, label %bb3

bb3:                                              ; preds = %bb2
  %3 = load volatile i8, i8* inttoptr (i16 78 to i8*), align 2
  ret void
}

; Function Attrs: noinline uwtable
define internal fastcc i8 @coords_key(i16) unnamed_addr #2 {
start:
  %abi_cast.sroa.0.0.extract.trunc = trunc i16 %0 to i8
  %abi_cast.sroa.4.0.extract.shift = lshr i16 %0, 8
  %abi_cast.sroa.4.0.extract.trunc = trunc i16 %abi_cast.sroa.4.0.extract.shift to i8
  switch i8 %abi_cast.sroa.0.0.extract.trunc, label %bb17 [
    i8 0, label %bb18
    i8 1, label %bb19
    i8 2, label %bb20
    i8 3, label %bb21
  ]

bb17:                                             ; preds = %bb21, %bb20, %bb19,
  unreachable

bb18:                                             ; preds = %start
  %1 = icmp ult i8 %abi_cast.sroa.4.0.extract.trunc, 4
  br i1 %1, label %switch.lookup, label %bb17

bb19:                                             ; preds = %start
  %2 = icmp ult i8 %abi_cast.sroa.4.0.extract.trunc, 4
  br i1 %2, label %switch.lookup1, label %bb17

bb20:                                             ; preds = %start
  %3 = icmp ult i8 %abi_cast.sroa.4.0.extract.trunc, 4
  br i1 %3, label %switch.lookup5, label %bb17

bb21:                                             ; preds = %start
  %4 = icmp ult i8 %abi_cast.sroa.4.0.extract.trunc, 4
  br i1 %4, label %switch.lookup9, label %bb17

switch.lookup:                                    ; preds = %bb18
  %5 = sext i8 %abi_cast.sroa.4.0.extract.trunc to i16
  %switch.gep = getelementptr inbounds [4 x i8], [4 x i8]* @switch.table, i16 0, i16 %5
  %switch.load = load i8, i8* %switch.gep, align 1
  ret i8 %switch.load

switch.lookup1:                                   ; preds = %bb19
  %6 = sext i8 %abi_cast.sroa.4.0.extract.trunc to i16
  %switch.gep3 = getelementptr inbounds [4 x i8], [4 x i8]* @switch.table.1, i16 0, i16 %6
  %switch.load4 = load i8, i8* %switch.gep3, align 1
  ret i8 %switch.load4

switch.lookup5:                                   ; preds = %bb20
  %7 = sext i8 %abi_cast.sroa.4.0.extract.trunc to i16
  %switch.gep7 = getelementptr inbounds [4 x i8], [4 x i8]* @switch.table.2, i16 0, i16 %7
  %switch.load8 = load i8, i8* %switch.gep7, align 1
  ret i8 %switch.load8

switch.lookup9:                                   ; preds = %bb21
  %8 = sext i8 %abi_cast.sroa.4.0.extract.trunc to i16
  %switch.gep11 = getelementptr inbounds [4 x i8], [4 x i8]* @switch.table.3, i16 0, i16 %8
  %switch.load12 = load i8, i8* %switch.gep11, align 1
  ret i8 %switch.load12
}

; Function Attrs: noinline nounwind uwtable
define internal fastcc i16 @wait_key() {
  ret i16 257 ; 0x0101
  
}

; Function Attrs: noreturn nounwind uwtable
define void @main() unnamed_addr #4 {
start:
  tail call fastcc void @spi_setup() #7

  %0 = tail call fastcc i16 @wait_key() #7
  %1 = tail call fastcc i8 @coords_key(i16 %0) #7
  tail call fastcc void @spi_sync(i8 %1) #7
  ret void
}

I can compile this on f1e3b63, i.e. with all the horrible kludges that force lpm to be used for loading globals, and the resulting assembly of coords_keys looks fine:

0000001c <coords_key>:
  1c:   81 30           cpi     r24, 0x01       ; 1
  1e:   01 f0           breq    .+0             ; 0x20 <coords_key+0x4>
                        1e: R_AVR_7_PCREL       .text+0x54
  20:   82 30           cpi     r24, 0x02       ; 2
  22:   01 f0           breq    .+0             ; 0x24 <coords_key+0x8>
                        22: R_AVR_7_PCREL       .text+0x40
  24:   83 30           cpi     r24, 0x03       ; 3
  26:   01 f0           breq    .+0             ; 0x28 <coords_key+0xc>
                        26: R_AVR_7_PCREL       .text+0x2a
  28:   00 c0           rjmp    .+0             ; 0x2a <LBB2_3>
                        28: R_AVR_13_PCREL      .text+0x68

0000002a <LBB2_3>:
  2a:   94 30           cpi     r25, 0x04       ; 4
  2c:   00 f0           brcs    .+0             ; 0x2e <LBB2_3+0x4>
                        2c: R_AVR_7_PCREL       .text+0x30
  2e:   00 c0           rjmp    .+0             ; 0x30 <LBB2_4>
                        2e: R_AVR_13_PCREL      .text+0x7c

00000030 <LBB2_4>:
  30:   e9 2f           mov     r30, r25
  32:   f9 2f           mov     r31, r25
  34:   ff 0f           add     r31, r31
  36:   ff 0b           sbc     r31, r31
  38:   e0 50           subi    r30, 0x00       ; 0
                        38: R_AVR_LO8_LDI_NEG   .rodata.cst4+0xc
  3a:   f0 40           sbci    r31, 0x00       ; 0
                        3a: R_AVR_HI8_LDI_NEG   .rodata.cst4+0xc
  3c:   84 91           lpm     r24, Z
  3e:   08 95           ret

00000040 <LBB2_5>:
  40:   94 30           cpi     r25, 0x04       ; 4
  42:   00 f4           brcc    .+0             ; 0x44 <LBB2_5+0x4>
                        42: R_AVR_7_PCREL       .text+0x7c
  44:   e9 2f           mov     r30, r25
  46:   f9 2f           mov     r31, r25
  48:   ff 0f           add     r31, r31
  4a:   ff 0b           sbc     r31, r31
  4c:   e0 50           subi    r30, 0x00       ; 0
                        4c: R_AVR_LO8_LDI_NEG   .rodata.cst4+0x8
  4e:   f0 40           sbci    r31, 0x00       ; 0
                        4e: R_AVR_HI8_LDI_NEG   .rodata.cst4+0x8
  50:   84 91           lpm     r24, Z
  52:   08 95           ret

00000054 <LBB2_7>:
  54:   94 30           cpi     r25, 0x04       ; 4
  56:   00 f4           brcc    .+0             ; 0x58 <LBB2_7+0x4>
                        56: R_AVR_7_PCREL       .text+0x7c
  58:   e9 2f           mov     r30, r25
  5a:   f9 2f           mov     r31, r25
  5c:   ff 0f           add     r31, r31
  5e:   ff 0b           sbc     r31, r31
  60:   e0 50           subi    r30, 0x00       ; 0
                        60: R_AVR_LO8_LDI_NEG   .rodata.cst4+0x4
  62:   f0 40           sbci    r31, 0x00       ; 0
                        62: R_AVR_HI8_LDI_NEG   .rodata.cst4+0x4
  64:   84 91           lpm     r24, Z
  66:   08 95           ret

00000068 <LBB2_9>:
  68:   94 30           cpi     r25, 0x04       ; 4
  6a:   00 f4           brcc    .+0             ; 0x6c <LBB2_9+0x4>
                        6a: R_AVR_7_PCREL       .text+0x7c
  6c:   e9 2f           mov     r30, r25
  6e:   f9 2f           mov     r31, r25
  70:   ff 0f           add     r31, r31
  72:   ff 0b           sbc     r31, r31
  74:   e0 50           subi    r30, 0x00       ; 0
                        74: R_AVR_LO8_LDI_NEG   .rodata.cst4
  76:   f0 40           sbci    r31, 0x00       ; 0
                        76: R_AVR_HI8_LDI_NEG   .rodata.cst4
  78:   84 91           lpm     r24, Z
  7a:   08 95           ret

In fact, the above code is correct in that I can link it with a custom linker script and get a working program:

$ llc --filetype=obj a.ll
$ avr-gcc -Os -Wl,--gc-sections -T lookup-text.ld -mmcu=atmega328p -o good.elf a.o
$ avr-objdump --syms good.elf |grep switch
00000118 l     O .text	00000004 switch.table
0000011c l     O .text	00000004 switch.table.1
00000120 l     O .text	00000004 switch.table.2
00000124 l     O .text	00000004 switch.table.3

but with the default linker script, the lookup tables end up in .data:

$ avr-gcc -Os -Wl,--gc-sections -mmcu=atmega328p -o bad.elf a.o
$ avr-objdump --syms bad.elf |grep switch
00800100 l     O .data	00000004 switch.table
00800104 l     O .data	00000004 switch.table.1
00800108 l     O .data	00000004 switch.table.2
0080010c l     O .data	00000004 switch.table.3

The customized linker script lookup-text.ld is as follows:


OUTPUT_FORMAT("elf32-avr","elf32-avr","elf32-avr")
OUTPUT_ARCH(avr:2)
MEMORY
{
  text   (rx)   : ORIGIN = 0, LENGTH = 8K
  data   (rw!x) : ORIGIN = 0x800060, LENGTH = 0xffa0
  eeprom (rw!x) : ORIGIN = 0x810000, LENGTH = 64K
  fuse      (rw!x) : ORIGIN = 0x820000, LENGTH = 1K
  lock      (rw!x) : ORIGIN = 0x830000, LENGTH = 1K
  signature (rw!x) : ORIGIN = 0x840000, LENGTH = 1K
  user_signatures (rw!x) : ORIGIN = 0x850000, LENGTH = 1K
}
SECTIONS
{
  /* Read-only sections, merged into text segment: */
  .hash          : { *(.hash)		}
  .dynsym        : { *(.dynsym)		}
  .dynstr        : { *(.dynstr)		}
  .gnu.version   : { *(.gnu.version)	}
  .gnu.version_d   : { *(.gnu.version_d)	}
  .gnu.version_r   : { *(.gnu.version_r)	}
  .rel.init      : { *(.rel.init)		}
  .rela.init     : { *(.rela.init)	}
  .rel.text      :
    {
      *(.rel.text)
      *(.rel.text.*)
      *(.rel.gnu.linkonce.t*)
    }
  .rela.text     :
    {
      *(.rela.text)
      *(.rela.text.*)
      *(.rela.gnu.linkonce.t*)
    }
  .rel.fini      : { *(.rel.fini)		}
  .rela.fini     : { *(.rela.fini)	}
  .rel.rodata    :
    {
      *(.rel.rodata)
      *(.rel.rodata.*)
      *(.rel.gnu.linkonce.r*)
    }
  .rela.rodata   :
    {
      *(.rela.rodata)
      *(.rela.rodata.*)
      *(.rela.gnu.linkonce.r*)
    }
  .rel.data      :
    {
      *(.rel.data)
      *(.rel.data.*)
      *(.rel.gnu.linkonce.d*)
    }
  .rela.data     :
    {
      *(.rela.data)
      *(.rela.data.*)
      *(.rela.gnu.linkonce.d*)
    }
  .rel.ctors     : { *(.rel.ctors)	}
  .rela.ctors    : { *(.rela.ctors)	}
  .rel.dtors     : { *(.rel.dtors)	}
  .rela.dtors    : { *(.rela.dtors)	}
  .rel.got       : { *(.rel.got)		}
  .rela.got      : { *(.rela.got)		}
  .rel.bss       : { *(.rel.bss)		}
  .rela.bss      : { *(.rela.bss)		}
  .rel.plt       : { *(.rel.plt)		}
  .rela.plt      : { *(.rela.plt)		}
  /* Internal text space or external memory.  */
  .text   :
  {
    *(.vectors)
    KEEP(*(.vectors))
    /* For data that needs to reside in the lower 64k of progmem.  */
    *(.progmem.gcc*)
    *(.progmem*)
    . = ALIGN(2);
     __trampolines_start = . ;
    /* The jump trampolines for the 16-bit limited relocs will reside here.  */
    *(.trampolines)
    *(.trampolines*)
     __trampolines_end = . ;
    /* For future tablejump instruction arrays for 3 byte pc devices.
       We don't relax jump/call instructions within these sections.  */
    *(.jumptables)
    *(.jumptables*)
    /* For code that needs to reside in the lower 128k progmem.  */
    *(.lowtext)
    *(.lowtext*)
     __ctors_start = . ;
     *(.ctors)
     __ctors_end = . ;
     __dtors_start = . ;
     *(.dtors)
     __dtors_end = . ;
    KEEP(SORT(*)(.ctors))
    KEEP(SORT(*)(.dtors))
    /* From this point on, we don't bother about wether the insns are
       below or above the 16 bits boundary.  */
    *(.init0)  /* Start here after reset.  */
    KEEP (*(.init0))
    *(.init1)
    KEEP (*(.init1))
    *(.init2)  /* Clear __zero_reg__, set up stack pointer.  */
    KEEP (*(.init2))
    *(.init3)
    KEEP (*(.init3))
    *(.init4)  /* Initialize data and BSS.  */
    KEEP (*(.init4))
    *(.init5)
    KEEP (*(.init5))
    *(.init6)  /* C++ constructors.  */
    KEEP (*(.init6))
    *(.init7)
    KEEP (*(.init7))
    *(.init8)
    KEEP (*(.init8))
    *(.init9)  /* Call main().  */
    KEEP (*(.init9))
    *(.text)
    . = ALIGN(2);
    *(.text.*)
    . = ALIGN(2);
    *(.fini9)  /* _exit() starts here.  */
    KEEP (*(.fini9))
    *(.fini8)
    KEEP (*(.fini8))
    *(.fini7)
    KEEP (*(.fini7))
    *(.fini6)  /* C++ destructors.  */
    KEEP (*(.fini6))
    *(.fini5)
    KEEP (*(.fini5))
    *(.fini4)
    KEEP (*(.fini4))
    *(.fini3)
    KEEP (*(.fini3))
    *(.fini2)
    KEEP (*(.fini2))
    *(.fini1)
    KEEP (*(.fini1))
    *(.fini0)  /* Infinite loop after program termination.  */
    KEEP (*(.fini0))
     _etext = .; 
    *(.rodata)  /* We need to include .rodata here if gcc is used */
    *(.rodata*) /* with -fdata-sections.  */
  }  > text
  .data	  : AT (ADDR (.text) + SIZEOF (.text))
  {
     PROVIDE (__data_start = .) ;
    /* --gc-sections will delete empty .data. This leads to wrong start
       addresses for subsequent sections because -Tdata= from the command
       line will have no effect, see PR13697.  Thus, keep .data  */
    KEEP (*(.data))
    *(.data*)
    *(.gnu.linkonce.d*)
    . = ALIGN(2);
     _edata = . ;
     PROVIDE (__data_end = .) ;
  }  > data
  .bss   : AT (ADDR (.bss))
  {
     PROVIDE (__bss_start = .) ;
    *(.bss)
    *(.bss*)
    *(COMMON)
     PROVIDE (__bss_end = .) ;
  }  > data
   __data_load_start = LOADADDR(.data);
   __data_load_end = __data_load_start + SIZEOF(.data);
  /* Global data not cleared after reset.  */
  .noinit  :
  {
     PROVIDE (__noinit_start = .) ;
    *(.noinit*)
     PROVIDE (__noinit_end = .) ;
     _end = . ;
     PROVIDE (__heap_start = .) ;
  }  > data
  .eeprom  :
  {
    KEEP (*(.eeprom*))
     __eeprom_end = . ;
  }  > eeprom
  .fuse  :
  {
    KEEP(*(.fuse))
    KEEP(*(.lfuse))
    KEEP(*(.hfuse))
    KEEP(*(.efuse))
  }  > fuse
  .lock  :
  {
    KEEP(*(.lock*))
  }  > lock
  .signature  :
  {
    KEEP(*(.signature*))
  }  > signature
  .user_signatures  :
  {
    KEEP(*(.user_signatures*))
  }  > user_signatures
  /* Stabs debugging sections.  */
  .stab 0 : { *(.stab) }
  .stabstr 0 : { *(.stabstr) }
  .stab.excl 0 : { *(.stab.excl) }
  .stab.exclstr 0 : { *(.stab.exclstr) }
  .stab.index 0 : { *(.stab.index) }
  .stab.indexstr 0 : { *(.stab.indexstr) }
  .comment 0 : { *(.comment) }
  /* DWARF debug sections.
     Symbols in the DWARF debugging sections are relative to the beginning
     of the section so we begin them at 0.  */
  /* DWARF 1 */
  .debug          0 : { *(.debug) }
  .line           0 : { *(.line) }
  /* GNU DWARF 1 extensions */
  .debug_srcinfo  0 : { *(.debug_srcinfo) }
  .debug_sfnames  0 : { *(.debug_sfnames) }
  /* DWARF 1.1 and DWARF 2 */
  .debug_aranges  0 : { *(.debug_aranges) }
  .debug_pubnames 0 : { *(.debug_pubnames) }
  /* DWARF 2 */
  .debug_info     0 : { *(.debug_info) *(.gnu.linkonce.wi.*) }
  .debug_abbrev   0 : { *(.debug_abbrev) }
  .debug_line     0 : { *(.debug_line) }
  .debug_frame    0 : { *(.debug_frame) }
  .debug_str      0 : { *(.debug_str) }
  .debug_loc      0 : { *(.debug_loc) }
  .debug_macinfo  0 : { *(.debug_macinfo) }
  /* DWARF 3 */
  .debug_pubtypes 0 : { *(.debug_pubtypes) }
  .debug_ranges   0 : { *(.debug_ranges) }
  /* DWARF Extension.  */
  .debug_macro    0 : { *(.debug_macro) }
}

The customization compared to the default avr-gcc linker script is as follows (it moves .rodata to .text):

--- default.ld	2017-07-15 15:20:01.762345763 +0800
+++ lookup-text.ld	2017-07-15 15:01:27.386390685 +0800
@@ -146,6 +146,8 @@
     *(.fini0)  /* Infinite loop after program termination.  */
     KEEP (*(.fini0))
      _etext = . ;
+    *(.rodata)  /* We need to include .rodata here if gcc is used */
+    *(.rodata*) /* with -fdata-sections.  */
   }  > text
   .data	  : AT (ADDR (.text) + SIZEOF (.text))
   {
@@ -155,8 +157,6 @@
        line will have no effect, see PR13697.  Thus, keep .data  */
     KEEP (*(.data))
     *(.data*)
-    *(.rodata)  /* We need to include .rodata here if gcc is used */
-    *(.rodata*) /* with -fdata-sections.  */
     *(.gnu.linkonce.d*)
     . = ALIGN(2);
      _edata = . ;

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-llvmAffects the LLVM AVR backendA-rustAffects overall Rust compiler architecture

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions