[compiler-rt] r291396 - [Builtins] [ARM] Adding Thumb1 support for fcmp
Weiming Zhao via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 8 10:51:12 PST 2017
Author: weimingz
Date: Sun Jan 8 12:51:12 2017
New Revision: 291396
URL: http://llvm.org/viewvc/llvm-project?rev=291396&view=rev
Log:
[Builtins] [ARM] Adding Thumb1 support for fcmp
Summary: Mainly translate IT block into cmp/branch for functions in comparesf2.S
Reviewers: rengolin, compnerd
Subscribers: aemerson, llvm-commits
Differential Revision: https://reviews.llvm.org/D28016
Modified:
compiler-rt/trunk/lib/builtins/arm/aeabi_fcmp.S
compiler-rt/trunk/lib/builtins/arm/comparesf2.S
Modified: compiler-rt/trunk/lib/builtins/arm/aeabi_fcmp.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/aeabi_fcmp.S?rev=291396&r1=291395&r2=291396&view=diff
==============================================================================
--- compiler-rt/trunk/lib/builtins/arm/aeabi_fcmp.S (original)
+++ compiler-rt/trunk/lib/builtins/arm/aeabi_fcmp.S Sun Jan 8 12:51:12 2017
@@ -26,10 +26,10 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp
bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \
cmp r0, #0 SEPARATOR \
b ## cond 1f SEPARATOR \
- mov r0, #0 SEPARATOR \
+ movs r0, #0 SEPARATOR \
pop { r4, pc } SEPARATOR \
1: SEPARATOR \
- mov r0, #1 SEPARATOR \
+ movs r0, #1 SEPARATOR \
pop { r4, pc } SEPARATOR \
END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond)
Modified: compiler-rt/trunk/lib/builtins/arm/comparesf2.S
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/builtins/arm/comparesf2.S?rev=291396&r1=291395&r2=291396&view=diff
==============================================================================
--- compiler-rt/trunk/lib/builtins/arm/comparesf2.S (original)
+++ compiler-rt/trunk/lib/builtins/arm/comparesf2.S Sun Jan 8 12:51:12 2017
@@ -47,27 +47,50 @@
DEFINE_COMPILERRT_FUNCTION(__eqsf2)
// Make copies of a and b with the sign bit shifted off the top. These will
// be used to detect zeros and NaNs.
+#if __ARM_ARCH_ISA_THUMB == 1
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+#else
mov r2, r0, lsl #1
mov r3, r1, lsl #1
+#endif
// We do the comparison in three stages (ignoring NaN values for the time
// being). First, we orr the absolute values of a and b; this sets the Z
// flag if both a and b are zero (of either sign). The shift of r3 doesn't
// effect this at all, but it *does* make sure that the C flag is clear for
// the subsequent operations.
+#if __ARM_ARCH_ISA_THUMB == 1
+ lsrs r6, r3, #1
+ orrs r6, r2, r6
+#else
orrs r12, r2, r3, lsr #1
-
+#endif
// Next, we check if a and b have the same or different signs. If they have
// opposite signs, this eor will set the N flag.
+#if __ARM_ARCH_ISA_THUMB == 1
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+#else
it ne
eorsne r12, r0, r1
+#endif
// If a and b are equal (either both zeros or bit identical; again, we're
// ignoring NaNs for now), this subtract will zero out r0. If they have the
// same sign, the flags are updated as they would be for a comparison of the
// absolute values of a and b.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bmi 1f
+ subs r0, r2, r3
+1:
+#else
it pl
subspl r0, r2, r3
+#endif
// If a is smaller in magnitude than b and both have the same sign, place
// the negation of the sign of b in r0. Thus, if both are negative and
@@ -79,30 +102,69 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2)
// still clear from the shift argument in orrs; if a is positive and b
// negative, this places 0 in r0; if a is negative and b positive, -1 is
// placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bhs 1f
+ // Here if a and b have the same sign and absA < absB, the result is thus
+ // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan).
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN)
+1:
+#else
it lo
mvnlo r0, r1, asr #31
+#endif
// If a is greater in magnitude than b and both have the same sign, place
// the sign of b in r0. Thus, if both are negative and a < b, -1 is placed
// in r0, which is the desired result. Conversely, if both are positive
// and a > b, zero is placed in r0.
+#if __ARM_ARCH_ISA_THUMB == 1
+ bls 1f
+ // Here both have the same sign and absA > absB.
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN)
+ negs r0, r0
+1:
+#else
it hi
movhi r0, r1, asr #31
+#endif
// If you've been keeping track, at this point r0 contains -1 if a < b and
// 0 if a >= b. All that remains to be done is to set it to 1 if a > b.
// If a == b, then the Z flag is set, so we can get the correct final value
// into r0 by simply or'ing with 1 if Z is clear.
+ // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b.
+#if __ARM_ARCH_ISA_THUMB != 1
it ne
orrne r0, r0, #1
+#endif
// Finally, we need to deal with NaNs. If either argument is NaN, replace
// the value in r0 with 1.
+#if __ARM_ARCH_ISA_THUMB == 1
+LOCAL_LABEL(CHECK_NAN):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 1f
+ cmp r3, r6
+1:
+ bls 2f
+ movs r0, #1
+2:
+ pop {r6, pc}
+#else
cmp r2, #0xff000000
ite ls
cmpls r3, #0xff000000
movhi r0, #1
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
@@ -111,11 +173,48 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__gtsf2)
// Identical to the preceding except in that we return -1 for NaN values.
- // Given that the two paths share so much code, one might be tempted to
+ // Given that the two paths share so much code, one might be tempted to
// unify them; however, the extra code needed to do so makes the code size
// to performance tradeoff very hard to justify for such small functions.
- mov r2, r0, lsl #1
- mov r3, r1, lsl #1
+#if __ARM_ARCH_ISA_THUMB == 1
+ push {r6, lr}
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ lsrs r6, r3, #1
+ orrs r6, r2, r6
+ beq 1f
+ movs r6, r0
+ eors r6, r1
+1:
+ bmi 2f
+ subs r0, r2, r3
+2:
+ bhs 3f
+ movs r0, #1
+ lsrs r1, #31
+ bne LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+ b LOCAL_LABEL(CHECK_NAN_2)
+3:
+ bls 4f
+ movs r0, #1
+ lsrs r1, #31
+ beq LOCAL_LABEL(CHECK_NAN_2)
+ negs r0, r0
+4:
+LOCAL_LABEL(CHECK_NAN_2):
+ movs r6, #0xff
+ lsls r6, #24
+ cmp r2, r6
+ bhi 5f
+ cmp r3, r6
+5:
+ bls 6f
+ movs r0, #1
+ negs r0, r0
+6:
+ pop {r6, pc}
+#else
orrs r12, r2, r3, lsr #1
it ne
eorsne r12, r0, r1
@@ -132,19 +231,32 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
cmpls r3, #0xff000000
movhi r0, #-1
JMP(lr)
+#endif
END_COMPILERRT_FUNCTION(__gtsf2)
DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__unordsf2)
// Return 1 for NaN values, 0 otherwise.
- mov r2, r0, lsl #1
- mov r3, r1, lsl #1
- mov r0, #0
+ lsls r2, r0, #1
+ lsls r3, r1, #1
+ movs r0, #0
+#if __ARM_ARCH_ISA_THUMB == 1
+ movs r1, #0xff
+ lsls r1, #24
+ cmp r2, r1
+ bhi 1f
+ cmp r3, r1
+1:
+ bls 2f
+ movs r0, #1
+2:
+#else
cmp r2, #0xff000000
ite ls
cmpls r3, #0xff000000
movhi r0, #1
+#endif
JMP(lr)
END_COMPILERRT_FUNCTION(__unordsf2)
More information about the llvm-commits
mailing list