diff options
author | Alexey Samsonov <samsonov@google.com> | 2014-02-14 09:20:33 +0000 |
---|---|---|
committer | Alexey Samsonov <samsonov@google.com> | 2014-02-14 09:20:33 +0000 |
commit | 53aa4fda49f94920139300227786ac47c393f1ce (patch) | |
tree | 6d022d04ee279fe1afd89668f346e28a9e3e1559 /lib/builtins/arm/udivsi3.S | |
parent | 6d999e478fecb10dc43f20b85385d25cc239db0a (diff) |
Move original compiler-rt functions (libgcc replacement) to lib/builtins directory
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@201393 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/builtins/arm/udivsi3.S')
-rw-r--r-- | lib/builtins/arm/udivsi3.S | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S new file mode 100644 index 000000000..11c1c096e --- /dev/null +++ b/lib/builtins/arm/udivsi3.S @@ -0,0 +1,148 @@ +/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivsi3 (32-bit unsigned integer divide) + * function for the ARM 32-bit architecture. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + + .syntax unified + + .text + .p2align 2 +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + JMPc(lr, eq) + cmp r0, r1 + movcc r0, #0 + JMPc(lr, cc) + /* + * Implement division using binary long division algorithm. + * + * r0 is the numerator, r1 the denominator. + * + * The code before JMP computes the correct shift I, so that + * r0 and (r1 << I) have the highest bit set in the same position. + * At the time of JMP, ip := .Ldiv0block - 12 * I. + * This depends on the fixed instruction size of block. + * + * block(shift) implements the test-and-update-quotient core. + * It assumes (r0 << shift) can be computed without overflow and + * that (r0 << shift) < 2 * r1. The quotient is stored in r3. + */ + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ + sub r3, r3, ip + adr ip, LOCAL_LABEL(div0block) + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 12) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 12) + + /* Last block, no need to update r2 or r3. */ + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 12) + + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + addhs r3, r3, IMM (1 << shift); \ + subhs r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + mov r0, r3 + JMP(lr) +#endif /* __ARM_ARCH_EXT_IDIV__ */ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivsi3) |