summaryrefslogtreecommitdiff
path: root/lib/builtins/arm
diff options
context:
space:
mode:
authorJoerg Sonnenberger <joerg@bec.de>2014-07-20 20:53:37 +0000
committerJoerg Sonnenberger <joerg@bec.de>2014-07-20 20:53:37 +0000
commite9471825794f3a8cb2c8fc06539fa863082590ac (patch)
tree8dd63f3b100ecb5f064bb610fc1716f1b0afd846 /lib/builtins/arm
parent344b05d7a377233d3842bb2eec5acaa4a5303cc5 (diff)
Redo THUMB support.
Discussed with and tested by: Saleem Abdulrasool git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@213481 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/builtins/arm')
-rw-r--r--lib/builtins/arm/udivmodsi4.S26
-rw-r--r--lib/builtins/arm/udivsi3.S28
-rw-r--r--lib/builtins/arm/umodsi3.S29
3 files changed, 76 insertions, 7 deletions
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S
index ddc875219..2bb65fedd 100644
--- a/lib/builtins/arm/udivmodsi4.S
+++ b/lib/builtins/arm/udivmodsi4.S
@@ -17,6 +17,18 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT(cond) it cond
+#define ITT(cond) itt cond
+#else
+#define IT(cond)
+#define ITT(cond)
+#endif
+
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
#if __ARM_ARCH_EXT_IDIV__
@@ -42,6 +54,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
+ * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@@ -53,12 +66,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
+# if __ARM_ARCH_ISA_THUMB == 2
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
adr ip, LOCAL_LABEL(div0block)
+# endif
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
# else
+# if __ARM_ARCH_ISA_THUMB == 2
+# error THUMB mode requires CLZ or UDIV
+# endif
str r4, [sp, #-8]!
mov r4, r0
@@ -98,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
- addhs r3, r3, IMM (1 << shift); \
- subhs r0, r0, r1, lsl IMM shift
+ ITT(hs); \
+ addhs.w r3, r3, IMM (1 << shift); \
+ subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index 8fb1dca0f..19fea56a2 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -17,6 +17,18 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT(cond) it cond
+#define ITT(cond) itt cond
+#else
+#define IT(cond)
+#define ITT(cond)
+#endif
+
.p2align 2
DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
DEFINE_COMPILERRT_FUNCTION(__udivsi3)
@@ -30,8 +42,10 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
#else
cmp r1, #1
bcc LOCAL_LABEL(divby0)
+ IT(eq)
JMPc(lr, eq)
cmp r0, r1
+ ITT(cc)
movcc r0, #0
JMPc(lr, cc)
/*
@@ -43,6 +57,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 12 * I.
* This depends on the fixed instruction size of block.
+ * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@@ -54,12 +69,20 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
+# if __ARM_ARCH_ISA_THUMB == 2
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
adr ip, LOCAL_LABEL(div0block)
+# endif
sub ip, ip, r3, lsl #2
sub ip, ip, r3, lsl #3
mov r3, #0
bx ip
# else
+# if __ARM_ARCH_ISA_THUMB == 2
+# error THUMB mode requires CLZ or UDIV
+# endif
mov r2, r0
adr ip, LOCAL_LABEL(div0block)
@@ -96,8 +119,9 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
#define block(shift) \
cmp r0, r1, lsl IMM shift; \
- addhs r3, r3, IMM (1 << shift); \
- subhs r0, r0, r1, lsl IMM shift
+ ITT(hs); \
+ addhs.w r3, r3, IMM (1 << shift); \
+ subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)
diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S
index 164646b1f..e81af0c14 100644
--- a/lib/builtins/arm/umodsi3.S
+++ b/lib/builtins/arm/umodsi3.S
@@ -16,6 +16,17 @@
.syntax unified
.text
+#if __ARM_ARCH_ISA_THUMB == 2
+ .thumb
+#endif
+
+#if __ARM_ARCH_ISA_THUMB == 2
+#define IT(cond) it cond
+#define ITT(cond) itt cond
+#else
+#define IT(cond)
+#define ITT(cond)
+#endif
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
@@ -30,9 +41,11 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#else
cmp r1, #1
bcc LOCAL_LABEL(divby0)
+ ITT(eq)
moveq r0, #0
JMPc(lr, eq)
cmp r0, r1
+ IT(cc)
JMPc(lr, cc)
/*
* Implement division using binary long division algorithm.
@@ -43,6 +56,7 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
* r0 and (r1 << I) have the highest bit set in the same position.
* At the time of JMP, ip := .Ldiv0block - 8 * I.
* This depends on the fixed instruction size of block.
+ * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
*
* block(shift) implements the test-and-update-quotient core.
* It assumes (r0 << shift) can be computed without overflow and
@@ -54,10 +68,18 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
clz r3, r1
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
sub r3, r3, ip
+# if __ARM_ARCH_ISA_THUMB == 2
+ adr ip, LOCAL_LABEL(div0block) + 1
+ sub ip, ip, r3, lsl #1
+# else
adr ip, LOCAL_LABEL(div0block)
+# endif
sub ip, ip, r3, lsl #3
bx ip
# else
+# if __ARM_ARCH_ISA_THUMB == 2
+# error THUMB mode requires CLZ or UDIV
+# endif
mov r2, r0
adr ip, LOCAL_LABEL(div0block)
@@ -90,9 +112,10 @@ DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#define IMM #
-#define block(shift) \
- cmp r0, r1, lsl IMM shift; \
- subhs r0, r0, r1, lsl IMM shift
+#define block(shift) \
+ cmp r0, r1, lsl IMM shift; \
+ IT(hs); \
+ subhs.w r0, r0, r1, lsl IMM shift
block(31)
block(30)