[PATCH] compiler-rt: allow ARM assembly to compile in Thumb mode
Tim Northover
t.p.northover at gmail.com
Mon Nov 11 10:36:08 PST 2013
Hi,
I'm working on building some more variants of compiler-rt, for
Cortex-M CPUs in particular. Since those CPUs don't support ARM mode,
we want the optimised functions to be compilable as Thumb2.
Fortunately UAL syntax allows this, and the only change needed (I
think) is to put in IT instructions for any conditional execution.
These are ignored (well, checked for consistency as a QoI issue but
don't generate code) when assembling in ARM mode.
The attached patch adds all of these, I believe. OK to commit?
Cheers.
Tim.
-------------- next part --------------
commit 05ebbc55485fd53775cfe925a0481feec6ba20b9
Author: Tim Northover <tnorthover at apple.com>
Date: Mon Nov 11 09:56:34 2013 -0800
ARM: make assembly files compile Thumb2 with nop IT block.
ARM's UAL syntax allows the same assembly file to be compiled in both ARM and
Thumb mode. Conditional execution is handled by requiring the Thumb IT blocks,
but essentially ignoring them when compiling for ARM.
diff --git a/lib/arm/comparesf2.S b/lib/arm/comparesf2.S
index ee18203..ce6f4b9 100644
--- a/lib/arm/comparesf2.S
+++ b/lib/arm/comparesf2.S
@@ -59,12 +59,14 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2)
// Next, we check if a and b have the same or different signs. If they have
// opposite signs, this eor will set the N flag.
+ it ne
eorsne r12, r0, r1
// If a and b are equal (either both zeros or bit identical; again, we're
// ignoring NaNs for now), this subtract will zero out r0. If they have the
// same sign, the flags are updated as they would be for a comparison of the
// absolute values of a and b.
+ it pl
subspl r0, r2, r3
// If a is smaller in magnitude than b and both have the same sign, place
@@ -77,23 +79,27 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2)
// still clear from the shift argument in orrs; if a is positive and b
// negative, this places 0 in r0; if a is negative and b positive, -1 is
// placed in r0.
+ it lo
mvnlo r0, r1, asr #31
// If a is greater in magnitude than b and both have the same sign, place
// the sign of b in r0. Thus, if both are negative and a < b, -1 is placed
// in r0, which is the desired result. Conversely, if both are positive
// and a > b, zero is placed in r0.
+ it hi
movhi r0, r1, asr #31
// If you've been keeping track, at this point r0 contains -1 if a < b and
// 0 if a >= b. All that remains to be done is to set it to 1 if a > b.
// If a == b, then the Z flag is set, so we can get the correct final value
// into r0 by simply or'ing with 1 if Z is clear.
- orrne r0, r0, #1
+ it ne
+ orrne r0, r0, #1
// Finally, we need to deal with NaNs. If either argument is NaN, replace
// the value in r0 with 1.
cmp r2, #0xff000000
+ ite ls
cmpls r3, #0xff000000
movhi r0, #1
bx lr
@@ -108,12 +114,18 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
mov r2, r0, lsl #1
mov r3, r1, lsl #1
orrs r12, r2, r3, lsr #1
+ it ne
eorsne r12, r0, r1
+ it pl
subspl r0, r2, r3
+ it lo
mvnlo r0, r1, asr #31
+ it hi
movhi r0, r1, asr #31
- orrne r0, r0, #1
+ it ne
+ orrne r0, r0, #1
cmp r2, #0xff000000
+ ite ls
cmpls r3, #0xff000000
movhi r0, #-1
bx lr
@@ -125,6 +137,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
mov r3, r1, lsl #1
mov r0, #0
cmp r2, #0xff000000
+ ite ls
cmpls r3, #0xff000000
movhi r0, #1
bx lr
diff --git a/lib/arm/switch16.S b/lib/arm/switch16.S
index e8f08c4..9c3f0cf 100644
--- a/lib/arm/switch16.S
+++ b/lib/arm/switch16.S
@@ -34,8 +34,9 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
ldrh ip, [lr, #-1] // get first 16-bit word in table
cmp r0, ip // compare with index
add r0, lr, r0, lsl #1 // compute address of element in table
- ldrshcc r0, [r0, #1] // load 16-bit element if r0 is in range
add ip, lr, ip, lsl #1 // compute address of last element in table
+ ite lo
+ ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range
ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range
add ip, lr, r0, lsl #1 // compute label = lr + element*2
bx ip // jump to computed label
diff --git a/lib/arm/switch32.S b/lib/arm/switch32.S
index 7008fcc..3152dfa 100644
--- a/lib/arm/switch32.S
+++ b/lib/arm/switch32.S
@@ -34,9 +34,10 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
ldr ip, [lr, #-1] // get first 32-bit word in table
cmp r0, ip // compare with index
add r0, lr, r0, lsl #2 // compute address of element in table
- ldrcc r0, [r0, #3] // load 32-bit element if r0 is in range
add ip, lr, ip, lsl #2 // compute address of last element in table
- ldrcs r0, [ip, #3] // load 32-bit element if r0 out of range
+ ite lo
+ ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range
+ ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range
add ip, lr, r0 // compute label = lr + element
bx ip // jump to computed label
diff --git a/lib/arm/switch8.S b/lib/arm/switch8.S
index e784b40..15729eb 100644
--- a/lib/arm/switch8.S
+++ b/lib/arm/switch8.S
@@ -33,7 +33,8 @@
DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
ldrb ip, [lr, #-1] // get first byte in table
cmp r0, ip // signed compare with index
- ldrsbcc r0, [lr, r0] // get indexed byte out of table
+ ite lo
+ ldrsblo r0, [lr, r0] // get indexed byte out of table
ldrsbhs r0, [lr, ip] // if out of range, use last entry in table
add ip, lr, r0, lsl #1 // compute label = lr + element*2
bx ip // jump to computed label
diff --git a/lib/arm/switchu8.S b/lib/arm/switchu8.S
index 19bed2f..0a4efac 100644
--- a/lib/arm/switchu8.S
+++ b/lib/arm/switchu8.S
@@ -33,7 +33,8 @@
DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
ldrb ip, [lr, #-1] // get first byte in table
cmp r0, ip // compare with index
- ldrbcc r0, [lr, r0] // get indexed byte out of table
+ ite lo
+ ldrblo r0, [lr, r0] // get indexed byte out of table
ldrbhs r0, [lr, ip] // if out of range, use last entry in table
add ip, lr, r0, lsl #1 // compute label = lr + element*2
bx ip // jump to computed label
diff --git a/lib/arm/udivmodsi4.S b/lib/arm/udivmodsi4.S
index 5fe53fe..aee2776 100644
--- a/lib/arm/udivmodsi4.S
+++ b/lib/arm/udivmodsi4.S
@@ -74,14 +74,17 @@ LOCAL_LABEL(mainLoop):
// this way, we can merge the two branches which is a substantial win for
// such a tight loop on current ARM architectures.
subs r, a, b, lsl i
+ itt hs
orrhs q, q,one, lsl i
movhs a, r
+ it ne
subsne i, i, #1
bhi LOCAL_LABEL(mainLoop)
// Do the final test subtraction and update of quotient (i == 0), as it is
// not performed in the main loop.
subs r, a, b
+ itt hs
orrhs q, #1
movhs a, r
diff --git a/lib/arm/udivsi3.S b/lib/arm/udivsi3.S
index 1c15825..2bb1412 100644
--- a/lib/arm/udivsi3.S
+++ b/lib/arm/udivsi3.S
@@ -73,14 +73,17 @@ LOCAL_LABEL(mainLoop):
// this way, we can merge the two branches which is a substantial win for
// such a tight loop on current ARM architectures.
subs r, a, b, lsl i
+ itt hs
orrhs q, q,one, lsl i
movhs a, r
+ it ne
subsne i, i, #1
bhi LOCAL_LABEL(mainLoop)
// Do the final test subtraction and update of quotient (i == 0), as it is
// not performed in the main loop.
subs r, a, b
+ it hs
orrhs q, #1
LOCAL_LABEL(return):
diff --git a/lib/arm/umodsi3.S b/lib/arm/umodsi3.S
index 188edf3..092a4f1 100644
--- a/lib/arm/umodsi3.S
+++ b/lib/arm/umodsi3.S
@@ -57,13 +57,16 @@ LOCAL_LABEL(mainLoop):
// this way, we can merge the two branches which is a substantial win for
// such a tight loop on current ARM architectures.
subs r, a, b, lsl i
+ it hs
movhs a, r
+ it ne
subsne i, i, #1
bhi LOCAL_LABEL(mainLoop)
// Do the final test subtraction and update of remainder (i == 0), as it is
// not performed in the main loop.
subs r, a, b
+ it hs
movhs a, r
bx lr
#endif
More information about the llvm-commits
mailing list