[llvm] 79dfe48 - [ARM] Set isCheapToSpeculateCtlz as true for hasV5TOps and no Thumb 1 (#154848)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 12:43:51 PDT 2025
Author: AZero13
Date: 2025-08-25T12:43:48-07:00
New Revision: 79dfe4886537795b22310cdfa941c67c78c67890
URL: https://github.com/llvm/llvm-project/commit/79dfe4886537795b22310cdfa941c67c78c67890
DIFF: https://github.com/llvm/llvm-project/commit/79dfe4886537795b22310cdfa941c67c78c67890.diff
LOG: [ARM] Set isCheapToSpeculateCtlz as true for hasV5TOps and no Thumb 1 (#154848)
This is so that we don't expand to include unneeded 0 checks.
Also fix the logic error in LegalizerInfo so it is NOT legal on Thumb1
in Fast-ISEL.
Finally, Remove the README entry regarding this issue.
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
llvm/lib/Target/ARM/README.txt
llvm/test/CodeGen/ARM/clz.ll
llvm/test/CodeGen/ARM/cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 40f636c49bf3d..4a158ef5bcae0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21380,11 +21380,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
}
bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
- return Subtarget->hasV6T2Ops();
+ return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
}
bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
- return Subtarget->hasV6T2Ops();
+ return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
}
bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index fc12f050fa5a5..cdff649ecfa57 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) {
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
- if (ST.hasV5TOps()) {
+ if (ST.hasV5TOps() && !ST.isThumb1Only()) {
getActionDefinitionsBuilder(G_CTLZ)
.legalFor({s32, s32})
.clampScalar(1, s32, s32)
diff --git a/llvm/lib/Target/ARM/README.txt b/llvm/lib/Target/ARM/README.txt
index def67cfae7277..ff84e07fa084a 100644
--- a/llvm/lib/Target/ARM/README.txt
+++ b/llvm/lib/Target/ARM/README.txt
@@ -697,22 +697,6 @@ target-neutral one.
//===---------------------------------------------------------------------===//
-Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins
-are specified to be undefined at zero, so portable code must check for zero
-and handle it as a special case. That is unnecessary on ARM where those
-operations are implemented in a way that is well-defined for zero. For
-example:
-
-int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
-
-should just be implemented with a CLZ instruction. Since there are other
-targets, e.g., PPC, that share this behavior, it would be best to implement
-this in a target-independent way: we should probably fold that (when using
-"undefined at zero" semantics) to set the "defined at zero" bit and have
-the code generator expand out the right code.
-
-//===---------------------------------------------------------------------===//
-
Clean up the test/MC/ARM files to have more robust register choices.
R0 should not be used as a register operand in the assembler tests as it's then
diff --git a/llvm/test/CodeGen/ARM/clz.ll b/llvm/test/CodeGen/ARM/clz.ll
index 0f49fbba11845..9e1e9f6ce6daa 100644
--- a/llvm/test/CodeGen/ARM/clz.ll
+++ b/llvm/test/CodeGen/ARM/clz.ll
@@ -1,12 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL
declare i32 @llvm.ctlz.i32(i32, i1)
-define i32 @test(i32 %x) {
-; CHECK-LABEL: test
-; INLINE: clz r0, r0
-; LIBCALL: b __clzsi2
+define i32 @undef_zero(i32 %x) {
+; INLINE-LABEL: undef_zero:
+; INLINE: @ %bb.0:
+; INLINE-NEXT: clz r0, r0
+; INLINE-NEXT: bx lr
+;
+; LIBCALL-LABEL: undef_zero:
+; LIBCALL: @ %bb.0:
+; LIBCALL-NEXT: b __clzsi2
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
ret i32 %tmp.1
}
+
+define i32 @no_undef_zero(i32 %x) {
+; INLINE-LABEL: no_undef_zero:
+; INLINE: @ %bb.0:
+; INLINE-NEXT: clz r0, r0
+; INLINE-NEXT: bx lr
+;
+; LIBCALL-LABEL: no_undef_zero:
+; LIBCALL: @ %bb.0:
+; LIBCALL-NEXT: cmp r0, #0
+; LIBCALL-NEXT: moveq r0, #32
+; LIBCALL-NEXT: moveq pc, lr
+; LIBCALL-NEXT: .LBB1_1: @ %cond.false
+; LIBCALL-NEXT: .save {r11, lr}
+; LIBCALL-NEXT: push {r11, lr}
+; LIBCALL-NEXT: bl __clzsi2
+; LIBCALL-NEXT: pop {r11, lr}
+; LIBCALL-NEXT: mov pc, lr
+ %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+ ret i32 %tmp.1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 1146ad64ee709..bf42e9f1104b4 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple arm-eabi -mattr=+v5t | FileCheck %s --check-prefix=CHECK-5
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s
; RUN: llc < %s -mtriple thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-6M
@@ -14,6 +15,15 @@ declare i64 @llvm.cttz.i64(i64, i1)
;------------------------------------------------------------------------------
define i8 @test_i8(i8 %a) {
+; CHECK-5-LABEL: test_i8:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: orr r0, r0, #256
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i8:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, #256
@@ -81,6 +91,15 @@ define i8 @test_i8(i8 %a) {
}
define i16 @test_i16(i16 %a) {
+; CHECK-5-LABEL: test_i16:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: orr r0, r0, #65536
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i16:
; CHECK: @ %bb.0:
; CHECK-NEXT: orr r0, r0, #65536
@@ -148,6 +167,14 @@ define i16 @test_i16(i16 %a) {
}
define i32 @test_i32(i32 %a) {
+; CHECK-5-LABEL: test_i32:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
@@ -207,6 +234,21 @@ define i32 @test_i32(i32 %a) {
}
define i64 @test_i64(i64 %a) {
+; CHECK-5-LABEL: test_i64:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r3, r1, #1
+; CHECK-5-NEXT: sub r2, r0, #1
+; CHECK-5-NEXT: bic r1, r3, r1
+; CHECK-5-NEXT: bic r2, r2, r0
+; CHECK-5-NEXT: clz r1, r1
+; CHECK-5-NEXT: clz r2, r2
+; CHECK-5-NEXT: rsb r1, r1, #64
+; CHECK-5-NEXT: cmp r0, #0
+; CHECK-5-NEXT: rsbne r1, r2, #32
+; CHECK-5-NEXT: mov r0, r1
+; CHECK-5-NEXT: mov r1, #0
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i64:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r1, r1
@@ -323,6 +365,14 @@ define i64 @test_i64(i64 %a) {
;------------------------------------------------------------------------------
define i8 @test_i8_zero_undef(i8 %a) {
+; CHECK-5-LABEL: test_i8_zero_undef:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i8_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
@@ -377,6 +427,14 @@ define i8 @test_i8_zero_undef(i8 %a) {
}
define i16 @test_i16_zero_undef(i16 %a) {
+; CHECK-5-LABEL: test_i16_zero_undef:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i16_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
@@ -432,6 +490,14 @@ define i16 @test_i16_zero_undef(i16 %a) {
define i32 @test_i32_zero_undef(i32 %a) {
+; CHECK-5-LABEL: test_i32_zero_undef:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r1, r0, #1
+; CHECK-5-NEXT: bic r0, r1, r0
+; CHECK-5-NEXT: clz r0, r0
+; CHECK-5-NEXT: rsb r0, r0, #32
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i32_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r0, r0
@@ -486,6 +552,21 @@ define i32 @test_i32_zero_undef(i32 %a) {
}
define i64 @test_i64_zero_undef(i64 %a) {
+; CHECK-5-LABEL: test_i64_zero_undef:
+; CHECK-5: @ %bb.0:
+; CHECK-5-NEXT: sub r3, r1, #1
+; CHECK-5-NEXT: sub r2, r0, #1
+; CHECK-5-NEXT: bic r1, r3, r1
+; CHECK-5-NEXT: bic r2, r2, r0
+; CHECK-5-NEXT: clz r1, r1
+; CHECK-5-NEXT: clz r2, r2
+; CHECK-5-NEXT: rsb r1, r1, #64
+; CHECK-5-NEXT: cmp r0, #0
+; CHECK-5-NEXT: rsbne r1, r2, #32
+; CHECK-5-NEXT: mov r0, r1
+; CHECK-5-NEXT: mov r1, #0
+; CHECK-5-NEXT: bx lr
+;
; CHECK-LABEL: test_i64_zero_undef:
; CHECK: @ %bb.0:
; CHECK-NEXT: rbit r1, r1
More information about the llvm-commits
mailing list