diff --git a/platform/mbed_lib.json b/platform/mbed_lib.json index 8c1b1c9e5f0..9e672947d06 100644 --- a/platform/mbed_lib.json +++ b/platform/mbed_lib.json @@ -153,6 +153,18 @@ "minimal-printf-set-floating-point-max-decimals": { "help": "Maximum number of decimals to be printed when using minimal printf library", "value": 6 + }, + "microlib-replace-64bit-multiply": { + "help": "Replace microlib 64x64->64 multiplication routine with faster version from standard ARM library. Greatly increases performance, and saves ROM size.", + "value": true + }, + "microlib-replace-32bit-divide": { + "help": "Replace microlib 32/32->32 division routines with faster version from standard ARM library. Greatly increases performance, at the cost of size. Turning this off may cause interrupt latency problems due to 32-bit divisions in interrupt routines.", + "value": true + }, + "microlib-replace-64bit-divide": { + "help": "Replace microlib 64/64->64 division routines with faster version from standard ARM library. Greatly increases performance, at the cost of size. Turning this off may cause interrupt latency problems due to 64-bit divisions in interrupt routines.", + "value": true } }, "target_overrides": { diff --git a/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_BASELINE/microlib_sub.S b/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_BASELINE/microlib_sub.S new file mode 100644 index 00000000000..4e5cdf7c0bd --- /dev/null +++ b/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_BASELINE/microlib_sub.S @@ -0,0 +1,595 @@ +;/* +; * Copyright (c) 2020 ARM Limited. All rights reserved. +; * +; * SPDX-License-Identifier: Apache-2.0 +; * +; * Licensed under the Apache License, Version 2.0 (the License); you may +; * not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an AS IS BASIS, WITHOUT +; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; * ----------------------------------------------------------------------------- +; * +; * Title: Cortex-M microlib replacements ( ARMv6-M, ARMv8-M Baseline ) +; * +; * ----------------------------------------------------------------------------- +; */ + +; Many of microlib's routines are very crude. Substitutes for +; poor performing ones are provided here, extracted from the +; standard ARM library. + +; We export all symbols as weak - this allows other mechanisms such +; as code sharing to override with their own $Sub$$ + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_MULTIPLY + AREA |x$lmul|,CODE,READONLY,CODEALIGN + +; Microlib Thumb-1 64x64->64 multiply routine is both bigger and +; slower. There is currently no downside to replacing it. + + EXPORT |$Sub$$__aeabi_lmul| [WEAK] + EXPORT |$Sub$$_ll_mul| [WEAK] +|$Sub$$_ll_mul| FUNCTION + ENDFUNC +|$Sub$$__aeabi_lmul| FUNCTION + MULS r3,r0,r3 + MULS r1,r2,r1 + PUSH {r4,r5,lr} + FRAME PUSH {r4,r5,lr} + ADDS r4,r3,r1 + LSRS r1,r0,#16 + LSRS r3,r2,#16 + MOV r5,r1 + UXTH r2,r2 + MULS r5,r3,r5 + UXTH r0,r0 + MULS r1,r2,r1 + ADDS r4,r5,r4 + MOV r5,r0 + MULS r5,r2,r5 + LSRS r2,r1,#16 + LSLS r1,r1,#16 + ADDS r5,r1,r5 + ADCS r2,r4 + MULS r0,r3,r0 + LSRS r1,r0,#16 + LSLS r0,r0,#16 + ADDS r0,r0,r5 + ADCS r1,r2 + POP {r4,r5,pc} + ENDFUNC +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_MULTIPLY + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_32BIT_DIVIDE + AREA |x$divmod|,CODE,READONLY,CODEALIGN + +; Microlib Thumb-1 32x32->32 divide routine is smaller, but +; slow - potentially enough to cause problems in IRQ handlers. +; This substitution is optional in case someone really +; wants the small version. + + EXPORT |$Sub$$__aeabi_uidiv| [WEAK] + EXPORT |$Sub$$__aeabi_uidivmod| [WEAK] + EXPORT |$Sub$$__aeabi_idiv| [WEAK] + EXPORT |$Sub$$__aeabi_idivmod| [WEAK] + EXTERN __aeabi_idiv0 +|$Sub$$__aeabi_uidiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_uidivmod| FUNCTION + MOVS r2,#0 + LSRS r3,r0,#4 + CMP r3,r1 + BCC try_shift3 + LSRS r3,r0,#8 + CMP r3,r1 + BCC try_shift7 + MOVS r3,#0 + MOV r12,r3 + B div_hard + ENDFUNC +|$Sub$$__aeabi_idiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_idivmod| FUNCTION + MOV r3,r0 + ORRS r3,r1 + BMI idiv_something_negative +; If both arguments to idiv are positive, then can use +; fast form. + MOVS r2,#0 + LSRS r3,r0,#1 + CMP r3,r1 + BCC try_shift0 + LSRS r3,r0,#4 + CMP r3,r1 + BCC try_shift3 + LSRS r3,r0,#8 + CMP r3,r1 + BCC try_shift7 + MOV r12,r2 + B div_hard +; Process up to 8-bit positive quotient without a loop +try_shift7 + LSRS r3,r0,#7 + CMP r3,r1 + BCC shift7_carry + LSLS r3,r1,#7 + SUBS r0,r0,r3 +shift7_carry + ADCS r2,r2 +try_shift6 + LSRS r3,r0,#6 + CMP r3,r1 + BCC shift6_carry + LSLS r3,r1,#6 + SUBS r0,r0,r3 +shift6_carry + ADCS r2,r2 +try_shift5 + LSRS r3,r0,#5 + CMP r3,r1 + BCC shift5_carry + LSLS r3,r1,#5 + SUBS r0,r0,r3 +shift5_carry + ADCS r2,r2 +try_shift4 + LSRS r3,r0,#4 + CMP r3,r1 + BCC shift4_carry + LSLS r3,r1,#4 + SUBS r0,r0,r3 +shift4_carry + ADCS r2,r2 +try_shift3 + LSRS r3,r0,#3 + CMP r3,r1 + BCC shift3_carry + LSLS r3,r1,#3 + SUBS r0,r0,r3 +shift3_carry + ADCS r2,r2 +try_shift2 + LSRS r3,r0,#2 + CMP r3,r1 + BCC shift2_carry + LSLS r3,r1,#2 + SUBS r0,r0,r3 +shift2_carry + ADCS r2,r2 +try_shift1 + LSRS r3,r0,#1 + CMP r3,r1 + BCC shift1_carry + LSLS r3,r1,#1 + SUBS r0,r0,r3 +shift1_carry + ADCS r2,r2 +try_shift0 + SUBS r1,r0,r1 + BCS shift0_carry + MOV r1,r0 +shift0_carry + ADCS r2,r2 + MOV r0,r2 + BX lr + B divbyzero + +idiv_something_negative +; Only loop form supports sign correction. Figure out +; adjustments for final sign correction, make operands +; positive, then enter loop + LSRS r2,r1,#31 + BEQ %F1 + RSBS r1,r1,#0 +01 ASRS r3,r0,#32 + BCC %F1 + RSBS r0,r0,#0 +01 EORS r3,r3,r2 + MOV r12,r3 + MOVS r2,#0 + LSRS r3,r0,#4 + CMP r3,r1 + BCC loop_try_shift3 + LSRS r3,r0,#8 + CMP r3,r1 + BCC loop_try_shift7 +; Full division loop, 6 bits at a time, then 2 final bits +; Run through precheck of 6 bits first, then enter loop. +div_hard + LSLS r1,r1,#6 + MOVS r2,#0xfc + REV r2,r2 + LSRS r3,r0,#8 + CMP r3,r1 + BCC loop_try_shift7 + LSLS r1,r1,#6 + ASRS r2,r2,#6 + CMP r3,r1 + BCC loop_try_shift7 + LSLS r1,r1,#6 + ASRS r2,r2,#6 + CMP r3,r1 + BCC loop_try_shift7 + LSLS r1,r1,#6 + BEQ hard_divbyzero + ASRS r2,r2,#6 + B loop_try_shift7 + +div_loop + LSRS r1,r1,#6 +loop_try_shift7 + LSRS r3,r0,#7 + CMP r3,r1 + BCC loop_shift7_carry + LSLS r3,r1,#7 + SUBS r0,r0,r3 +loop_shift7_carry + ADCS r2,r2 +loop_try_shift6 + LSRS r3,r0,#6 + CMP r3,r1 + BCC loop_shift6_carry + LSLS r3,r1,#6 + SUBS r0,r0,r3 +loop_shift6_carry + ADCS r2,r2 +loop_try_shift5 + LSRS r3,r0,#5 + CMP r3,r1 + BCC loop_shift5_carry + LSLS r3,r1,#5 + SUBS r0,r0,r3 +loop_shift5_carry + ADCS r2,r2 +loop_try_shift4 + LSRS r3,r0,#4 + CMP r3,r1 + BCC loop_shift4_carry + LSLS r3,r1,#4 + SUBS r0,r0,r3 +loop_shift4_carry + ADCS r2,r2 +loop_try_shift3 + LSRS r3,r0,#3 + CMP r3,r1 + BCC loop_shift3_carry + LSLS r3,r1,#3 + SUBS r0,r0,r3 +loop_shift3_carry + ADCS r2,r2 +loop_try_shift2 + LSRS r3,r0,#2 + CMP r3,r1 + BCC loop_shift2_carry + LSLS r3,r1,#2 + SUBS r0,r0,r3 +loop_shift2_carry + ADCS r2,r2 + BCS div_loop +loopend_try_shift1 + LSRS r3,r0,#1 + CMP r3,r1 + BCC loopend_shift1_carry + LSLS r3,r1,#1 + SUBS r0,r0,r3 +loopend_shift1_carry + ADCS r2,r2 + SUBS r1,r0,r1 + BCS loopend_shift0_carry + MOV r1,r0 +loopend_shift0_carry + ADCS r2,r2 + MOV r0,r2 + MOV r3,r12 + ASRS r3,r3,#1 + BCC %F01 + RSBS r0,r0,#0 + CMP r3,#0 +01 BPL %F01 + RSBS r1,r1,#0 +01 BX lr + +hard_divbyzero + MOV r3,r12 + ASRS r3,r3,#1 + BCC divbyzero + RSBS r0,r0,#0 +divbyzero + PUSH {r0,lr} + FRAME SAVE {lr},-4 + FRAME ADDRESS sp,8 + LDR r1,=0x7fffffff + CMP r0,#0 + BGT do_divbyzero_r1 + ADDS r1,r1,#1 + ANDS r0,r0,r1 + B do_divbyzero +do_divbyzero_r1 + MOV r0,r1 +do_divbyzero + BL __aeabi_idiv0 + POP {r1,pc} + ALIGN + ENDFUNC +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_32BIT_DIVIDE + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_DIVIDE + AREA |x$uldivmod|,CODE,READONLY,CODEALIGN + +; Microlib Thumb-1 64x64->64 divide routine is smaller, but +; extremely slow - enough to cause problems in IRQ handlers. +; This substitution is optional in case someone really +; wants the small version. + + EXPORT |$Sub$$__aeabi_uldivmod| [WEAK] + EXPORT |$Sub$$_ll_udiv| [WEAK] + EXTERN __aeabi_ldiv0 + +|$Sub$$_ll_udiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_uldivmod| FUNCTION + PUSH {r1-r7,lr} + FRAME SAVE {r4-r7,lr},-20 + FRAME ADDRESS sp,32 + FRAME STATE REMEMBER + MOV r4,r0 + MOV r0,r2 + MOV r5,r1 + ORRS r0,r3 + BEQ ldivbyzero + MOV lr,r4 + MOV r12,r1 + MOVS r0,#0 + SUBS r4,r4,r2 + MOV r1,r0 + SBCS r5,r3 + BCC ldiv_return_remainder_r12_lr + MOV r6,r12 + MOVS r7,#0 + MOVS r4,#1 + MOV r5,r7 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC triedshift32 + MOV r3,r2 + MOV r2,r7 + MOVS r4,#33 +triedshift32 + MOV r5,r12 + MOV r6,lr + LSLS r7,r5,#16 + LSRS r6,r6,#16 + ORRS r6,r7 + LSRS r5,r5,#16 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC triedshift16 + LSRS r5,r2,#16 + LSLS r3,r3,#16 + ORRS r3,r5 + LSLS r2,r2,#16 + ADDS r4,r4,#16 +triedshift16 + MOV r5,r12 + MOV r6,lr + LSLS r7,r5,#24 + LSRS r6,r6,#8 + ORRS r6,r7 + LSRS r5,r5,#8 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC triedshift8 + LSRS r5,r2,#24 + LSLS r3,r3,#8 + ORRS r3,r5 + LSLS r2,r2,#8 + ADDS r4,r4,#8 +triedshift8 + MOV r5,r12 + MOV r6,lr + LSLS r7,r5,#28 + LSRS r6,r6,#4 + ORRS r6,r7 + LSRS r5,r5,#4 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC triedshift4 + LSRS r5,r2,#28 + LSLS r3,r3,#4 + ORRS r3,r5 + LSLS r2,r2,#4 + ADDS r4,r4,#4 +triedshift4 + MOV r5,r12 + MOV r6,lr + LSLS r7,r5,#30 + LSRS r6,r6,#2 + ORRS r6,r7 + LSRS r5,r5,#2 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC triedshift2 + LSRS r5,r2,#30 + LSLS r3,r3,#2 + ORRS r3,r5 + LSLS r2,r2,#2 + ADDS r4,r4,#2 +triedshift2 + MOV r5,r12 + MOV r6,lr + LSLS r7,r5,#31 + LSRS r6,r6,#1 + ORRS r6,r7 + LSRS r5,r5,#1 + SUBS r6,r6,r2 + SBCS r5,r3 + BCC ldivloop_entry + ADDS r2,r2,r2 + ADCS r3,r3 + ADDS r4,r4,#1 + B ldivloop_entry +ldivloop + ADDS r0,r0,r0 + MOV r6,lr + MOV r5,r12 + ADCS r1,r1 + SUBS r7,r6,r2 + SBCS r5,r3 + STR r1,[sp,#4] + STR r0,[sp,#0] + BCC ldivloop_cc + MOV r0,r12 + SUBS r1,r6,r2 + SBCS r0,r3 + MOV lr,r1 + MOV r12,r0 + LDR r0,[sp,#0] + LDR r1,[sp,#4] + MOVS r5,#0 + ADDS r0,r0,#1 + ADCS r1,r5 +ldivloop_cc + LSLS r5,r3,#31 + LSRS r2,r2,#1 + ORRS r2,r5 + LSRS r3,r3,#1 +ldivloop_entry + SUBS r4,r4,#1 + BPL ldivloop +ldiv_return_remainder_r12_lr + MOV r2,lr + MOV r3,r12 +ldiv_return + ADD sp,#12 + FRAME POP 12 + POP {r4-r7,pc} + FRAME POP {r4-r7,pc} + B ldivbyzero ; Not clear what this instruction is doing in library - retained for diff check +ldivbyzero + FRAME STATE RESTORE + MOV r0,r4 + ORRS r0,r5 + BEQ ldivzerobyzero + MOVS r0,#0 + MVNS r0,r0 + MOV r1,r0 +call_ldiv0 + BL __aeabi_ldiv0 + MOV r2,r4 + MOV r3,r5 + B ldiv_return +ldivzerobyzero + MOVS r0,#0 + MOV r1,r0 + B call_ldiv0 + ENDFUNC + + AREA |x$sldivmod|,CODE,READONLY,CODEALIGN + + EXPORT |$Sub$$__aeabi_ldivmod| [WEAK] + EXPORT |$Sub$$_ll_sdiv| [WEAK] + IMPORT __aeabi_uldivmod +|$Sub$$_ll_sdiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_ldivmod| FUNCTION + PUSH {r4-r6,lr} + FRAME PUSH {r4-r6,lr} + MOV r5,r0 + MOV r0,r2 + MOV r4,r1 + ORRS r0,r3 + BEQ sdivbyzero + MOVS r1,#0 + MOV r0,r1 + SUBS r1,r1,r2 + SBCS r0,r3 + BGE sdivbynegative + MOVS r1,#0 + MOV r0,r1 + SUBS r1,r1,r5 + SBCS r0,r4 + BGE sdivnegativebypositive + MOV r0,r5 + MOV r1,r4 + BL __aeabi_uldivmod + POP {r4-r6,pc} +sdivbyzero + CMP r4,#0 + BGE sdivposbyzero + MOVS r3,#1 + MOVS r2,#0 + LSLS r3,r3,#31 + B sdivbyzero_out +sdivposbyzero + MOVS r1,#0 + MOV r0,r1 + SUBS r1,r1,r5 + SBCS r0,r4 + BGE sdivbyzero_out + MOVS r2,#0 + MVNS r2,r2 + LSRS r3,r2,#1 +sdivbyzero_out + MOV r0,r2 + MOV r1,r3 + BL __aeabi_ldiv0 + MOV r2,r5 + B sdiv_return_remainder_r4_r2 +sdivnegativebypositive + MOVS r1,#0 + RSBS r0,r5,#0 + SBCS r1,r4 + BL __aeabi_uldivmod + MOVS r4,#0 + RSBS r0,r0,#0 + SBCS r4,r1 + MOV r1,r4 + B sdiv_return_negateremainder +sdivbynegative + MOVS r1,#0 + MOV r0,r1 + SUBS r1,r1,r5 + SBCS r0,r4 + BGE sdivnegativebynegative +sdivpositivebynegative + MOV r1,r4 + MOVS r0,#0 + RSBS r2,r2,#0 + SBCS r0,r3 + MOV r3,r0 + MOV r0,r5 + BL __aeabi_uldivmod + MOVS r4,#0 + RSBS r0,r0,#0 + SBCS r4,r1 + MOV r1,r4 + POP {r4-r6,pc} +sdivnegativebynegative + MOVS r0,#0 + RSBS r2,r2,#0 + SBCS r0,r3 + MOVS r1,#0 + MOV r3,r0 + RSBS r0,r5,#0 + SBCS r1,r4 + BL __aeabi_uldivmod +sdiv_return_negateremainder + MOVS r4,#0 + RSBS r2,r2,#0 + SBCS r4,r3 +sdiv_return_remainder_r4_r2 + MOV r3,r4 + POP {r4-r6,pc} + ENDFUNC + +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_DIVIDE + + END diff --git a/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_MAINLINE/microlib_sub.S b/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_MAINLINE/microlib_sub.S new file mode 100644 index 00000000000..90712ac2370 --- /dev/null +++ b/platform/source/TARGET_CORTEX_M/TOOLCHAIN_ARM_MICRO/TARGET_CORTEX_M_MAINLINE/microlib_sub.S @@ -0,0 +1,243 @@ +;/* +; * Copyright (c) 2020 ARM Limited. All rights reserved. +; * +; * SPDX-License-Identifier: Apache-2.0 +; * +; * Licensed under the Apache License, Version 2.0 (the License); you may +; * not use this file except in compliance with the License. +; * You may obtain a copy of the License at +; * +; * www.apache.org/licenses/LICENSE-2.0 +; * +; * Unless required by applicable law or agreed to in writing, software +; * distributed under the License is distributed on an AS IS BASIS, WITHOUT +; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; * See the License for the specific language governing permissions and +; * limitations under the License. +; * +; * ----------------------------------------------------------------------------- +; * +; * Title: Cortex-M microlib replacements ( ARMv7-M, ARMv8-M Mainline ) +; * +; * ----------------------------------------------------------------------------- +; */ + +; Many of microlib's routines are very crude. Substitutes for +; poor performing ones are provided here, extracted from the +; standard ARM library. + +; We export all symbols as weak - this allows other mechanisms such +; as code sharing to override with their own $Sub$$ + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_MULTIPLY + AREA |x$lmul|,CODE,READONLY,CODEALIGN + +; Microlib Thumb-2 64x64->64 multiply routine is both bigger and +; slower. There is currently no downside to replacing it. The +; compiler will not normally use the library anyway. + + EXPORT |$Sub$$__aeabi_lmul| [WEAK] + EXPORT |$Sub$$_ll_mul| [WEAK] +|$Sub$$_ll_mul| FUNCTION + ENDFUNC +|$Sub$$__aeabi_lmul| FUNCTION + PUSH {lr} + FRAME PUSH {lr} + MOV lr,r0 + UMULL r0,r12,r2,lr + MLA r1,r2,r1,r12 + MLA r1,r3,lr,r1 + POP {pc} + ENDFUNC +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_MULTIPLY + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_32BIT_DIVIDE +; Microlib Thumb-2 32x32->32 divide routine is written +; as if it was just for Thumb-1. There is currently no +; downside to replacing it. The compiler will not +; normally use the library anyway. + + AREA |x$sdiv|,CODE,READONLY,CODEALIGN + EXPORT |$Sub$$__aeabi_idiv| [WEAK] +|$Sub$$__aeabi_idiv| FUNCTION + SDIV r0,r0,r1 + BX lr + ENDFUNC + + AREA |x$sdivmod|,CODE,READONLY,CODEALIGN + EXPORT |$Sub$$__aeabi_idivmod| [WEAK] +|$Sub$$__aeabi_idivmod| FUNCTION + SDIV r2,r0,r1 + MLS r1,r2,r1,r0 + MOV r0,r2 + BX lr + ENDFUNC + + AREA |x$udiv|,CODE,READONLY,CODEALIGN + EXPORT |$Sub$$__aeabi_uidiv| [WEAK] +|$Sub$$__aeabi_uidiv| FUNCTION + UDIV r0,r0,r1 + BX lr + ENDFUNC + + AREA |x$udivmod|,CODE,READONLY,CODEALIGN + EXPORT |$Sub$$__aeabi_uidivmod| [WEAK] +|$Sub$$__aeabi_uidivmod| FUNCTION + UDIV r2,r0,r1 + MLS r1,r2,r1,r0 + MOV r0,r2 + BX lr + ENDFUNC + +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_32BIT_DIVIDE + +#if MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_DIVIDE + AREA |x$uldivmod|,CODE,READONLY,CODEALIGN + +; Microlib Thumb-2 64x64->64 divide routine is smaller, but +; extremely slow - enough to cause problems in IRQ handlers. +; This substitution is optional in case someone really +; wants the small version. + + EXPORT |$Sub$$__aeabi_uldivmod| [WEAK] + EXPORT |$Sub$$_ll_udiv| [WEAK] + EXTERN __aeabi_ldiv0 + +|$Sub$$_ll_udiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_uldivmod| FUNCTION + ORRS r12,r3,r2 + BEQ.W ldivbyzero ; BEQ.W to match library version for diff; BEQ.N is valid + PUSH {r4-r9,r11,lr} + FRAME PUSH {r4-r9,r11,lr} + MOV r6,#0 + CMP r3,#0 + CLZNE r5,r3 + LSLNE r4,r3,r5 + LSRNE r6,r4,r5 + EORNE r6,r6,r3 + ORRNE r6,r6,r2 + CLZEQ r5,r2 + LSLEQ r4,r2,r5 + RSB r5,r5,#32 + LSRNE r12,r2,r5 + ORRNE r4,r4,r12 + ADDNE r5,r5,#32 + ORRS r12,r6,r4,LSL #16 + LSR r4,r4,#16 + ADDNE r4,r4,#1 + MOV r8,#0 + MOV r9,#0 +ldivloop + CMP r0,r2 + SBCS r12,r1,r3 + BCC ldiv_return + CMP r1,#0 + CLZNE r7,r1 + LSLNE r6,r1,r7 + CLZEQ r7,r0 + LSLEQ r6,r0,r7 + RSB r7,r7,#32 + LSRNE r12,r0,r7 + ORRNE r6,r6,r12 + ADDNE r7,r7,#32 + UDIV r12,r6,r4 + SUB r7,r7,r5 + SUBS r7,r7,#16 + AND r11,r7,#31 + RSB r6,r11,#32 + LSR r6,r12,r6 + LSL r11,r12,r11 + MOVMI r11,r6 + MOVMI r6,#0 + CMP r7,#32 + MOVGE r6,r11 + MOVGE r11,#0 + ORRS r12,r11,r6 + MOVEQ r11,#1 + ADDS r9,r9,r11 + ADC r8,r8,r6 + UMULL r7,r12,r11,r2 + MLA r12,r6,r2,r12 + MLA r12,r11,r3,r12 + SUBS r0,r0,r7 + SBCS r1,r1,r12 + B ldivloop +ldiv_return + MOV r3,r1 + MOV r2,r0 + MOV r1,r8 + MOV r0,r9 + POP {r4-r9,r11,pc} + FRAME POP {r4-r9,r11,pc} + +ldivbyzero + PUSH {r0,r1,r4,lr} + FRAME PUSH {r4,lr},16 + ORRS r0,r0,r1 + MOVNE r0,#0xffffffff + MOV r1,r0 + BL __aeabi_ldiv0 + POP {r2-r4,lr} + FRAME POP {r4,lr},16 + BX lr + ENDFUNC + + AREA |x$sldivmod|,CODE,READONLY,CODEALIGN + + EXPORT |$Sub$$__aeabi_ldivmod| [WEAK] + EXPORT |$Sub$$_ll_sdiv| [WEAK] + IMPORT __aeabi_uldivmod +|$Sub$$_ll_sdiv| FUNCTION + ENDFUNC +|$Sub$$__aeabi_ldivmod| FUNCTION + ORRS r12,r3,r2 + BEQ sdivbyzero + PUSH {r4,lr} + FRAME PUSH {r4,lr} + ASRS r4,r1,#1 + EOR r4,r4,r3,LSR #1 + BPL sdivpositive + RSBS r0,r0,#0 + RSB r1,r1,#0 ; No RSC instruction in Thumb! + SUBCC r1,r1,#1 +sdivpositive + TST r3,r3 + BPL sdivbypositive + RSBS r2,r2,#0 + RSB r3,r3,#0 + SUBCC r3,r3,#1 +sdivbypositive + BL __aeabi_uldivmod + TST r4,#0x40000000 + BEQ sdiv_quotient_correct + RSBS r0,r0,#0 + RSB r1,r1,#0 + SUBCC r1,r1,#1 +sdiv_quotient_correct + TST r4,#0x80000000 + BEQ sdiv_remainder_correct + RSBS r2,r2,#0 + RSB r3,r3,#0 + SUBCC r3,r3,#1 +sdiv_remainder_correct + POP {r4,pc} + FRAME POP {r4,pc} +sdivbyzero + PUSH {r0,r1,r4,lr} + FRAME PUSH {r4,lr},16 + RSBS r0,r0,#0 + MOV r0,#0 + SBCS r4,r0,r1 + MOVLT r0,#1 + AND r1,r1,#0x80000000 + CMP r0,#0 + MVNNE r1,#0x80000000 + MOVNE r0,#0xffffffff + BL __aeabi_ldiv0 + POP {r2-r4,pc} + ENDFUNC + +#endif // MBED_CONF_PLATFORM_MICROLIB_REPLACE_64BIT_DIVIDE + + END diff --git a/tools/targets/__init__.py b/tools/targets/__init__.py index 79d9819ade9..13a027c9c42 100644 --- a/tools/targets/__init__.py +++ b/tools/targets/__init__.py @@ -43,29 +43,24 @@ "CUMULATIVE_ATTRIBUTES", "get_resolution_order"] CORE_LABELS = { - "Cortex-M0": ["M0", "CORTEX_M", "LIKE_CORTEX_M0", "CORTEX"], - "Cortex-M0+": ["M0P", "CORTEX_M", "LIKE_CORTEX_M0", "CORTEX"], - "Cortex-M1": ["M1", "CORTEX_M", "LIKE_CORTEX_M1", "CORTEX"], - "Cortex-M3": ["M3", "CORTEX_M", "LIKE_CORTEX_M3", "CORTEX"], - "Cortex-M4": ["M4", "CORTEX_M", "RTOS_M4_M7", "LIKE_CORTEX_M4", "CORTEX"], - "Cortex-M4F": ["M4", "CORTEX_M", "RTOS_M4_M7", "LIKE_CORTEX_M4", "CORTEX"], - "Cortex-M7": ["M7", "CORTEX_M", "RTOS_M4_M7", "LIKE_CORTEX_M7", "CORTEX"], - "Cortex-M7F": ["M7", "CORTEX_M", "RTOS_M4_M7", "LIKE_CORTEX_M7", "CORTEX"], - "Cortex-M7FD": ["M7", "CORTEX_M", "RTOS_M4_M7", "LIKE_CORTEX_M7", - "CORTEX"], + "Cortex-M0": ["M0", "CORTEX_M", "CORTEX_M_BASELINE", "LIKE_CORTEX_M0", "CORTEX"], + "Cortex-M0+": ["M0P", "CORTEX_M", "CORTEX_M_BASELINE", "LIKE_CORTEX_M0", "CORTEX"], + "Cortex-M1": ["M1", "CORTEX_M", "CORTEX_M_BASELINE", "LIKE_CORTEX_M1", "CORTEX"], + "Cortex-M3": ["M3", "CORTEX_M", "CORTEX_M_MAINLINE", "LIKE_CORTEX_M3", "CORTEX"], + "Cortex-M4": ["M4", "CORTEX_M", "CORTEX_M_MAINLINE", "RTOS_M4_M7", "LIKE_CORTEX_M4", "CORTEX"], + "Cortex-M4F": ["M4", "CORTEX_M", "CORTEX_M_MAINLINE", "RTOS_M4_M7", "LIKE_CORTEX_M4", "CORTEX"], + "Cortex-M7": ["M7", "CORTEX_M", "CORTEX_M_MAINLINE", "RTOS_M4_M7", "LIKE_CORTEX_M7", "CORTEX"], + "Cortex-M7F": ["M7", "CORTEX_M", "CORTEX_M_MAINLINE", "RTOS_M4_M7", "LIKE_CORTEX_M7", "CORTEX"], + "Cortex-M7FD": ["M7", "CORTEX_M", "CORTEX_M_MAINLINE", "RTOS_M4_M7", "LIKE_CORTEX_M7", "CORTEX"], "Cortex-A9": ["A9", "CORTEX_A", "LIKE_CORTEX_A9", "CORTEX"], - "Cortex-M23": ["M23", "CORTEX_M", "LIKE_CORTEX_M23", "CORTEX"], - "Cortex-M23-NS": ["M23", "M23_NS", "CORTEX_M", "LIKE_CORTEX_M23", - "CORTEX"], - "Cortex-M33": ["M33", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"], - "Cortex-M33-NS": ["M33", "M33_NS", "CORTEX_M", "LIKE_CORTEX_M33", - "CORTEX"], - "Cortex-M33F": ["M33", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"], - "Cortex-M33F-NS": ["M33", "M33_NS", "CORTEX_M", "LIKE_CORTEX_M33", - "CORTEX"], - "Cortex-M33FE": ["M33", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"], - "Cortex-M33FE-NS": ["M33", "M33_NS", "CORTEX_M", "LIKE_CORTEX_M33", - "CORTEX"] + "Cortex-M23": ["M23", "CORTEX_M", "CORTEX_M_BASELINE", "LIKE_CORTEX_M23", "CORTEX"], + "Cortex-M23-NS": ["M23", "M23_NS", "CORTEX_M_BASELINE", "CORTEX_M", "LIKE_CORTEX_M23", "CORTEX"], + "Cortex-M33": ["M33", "CORTEX_M", "CORTEX_M_MAINLINE", "LIKE_CORTEX_M33", "CORTEX"], + "Cortex-M33-NS": ["M33", "M33_NS", "CORTEX_M_MAINLINE", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"], + "Cortex-M33F": ["M33", "CORTEX_M", "CORTEX_M_MAINLINE", "LIKE_CORTEX_M33", "CORTEX"], + "Cortex-M33F-NS": ["M33", "M33_NS", "CORTEX_M_MAINLINE", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"], + "Cortex-M33FE": ["M33", "CORTEX_M", "CORTEX_M_MAINLINE", "LIKE_CORTEX_M33", "CORTEX"], + "Cortex-M33FE-NS": ["M33", "M33_NS", "CORTEX_M_MAINLINE", "CORTEX_M", "LIKE_CORTEX_M33", "CORTEX"] } CORE_ARCH = {