[llvm] 79dfe48 - [ARM] Set isCheapToSpeculateCtlz as true for hasV5TOps and no Thumb 1 (#154848)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 25 12:43:51 PDT 2025


Author: AZero13
Date: 2025-08-25T12:43:48-07:00
New Revision: 79dfe4886537795b22310cdfa941c67c78c67890

URL: https://github.com/llvm/llvm-project/commit/79dfe4886537795b22310cdfa941c67c78c67890
DIFF: https://github.com/llvm/llvm-project/commit/79dfe4886537795b22310cdfa941c67c78c67890.diff

LOG: [ARM] Set isCheapToSpeculateCtlz as true for hasV5TOps and no Thumb 1 (#154848)

This is so that we don't expand to include unneeded 0 checks.

Also fix the logic error in LegalizerInfo so it is NOT legal on Thumb1
in Fast-ISEL.

Finally, Remove the README entry regarding this issue.

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
    llvm/lib/Target/ARM/README.txt
    llvm/test/CodeGen/ARM/clz.ll
    llvm/test/CodeGen/ARM/cttz.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 40f636c49bf3d..4a158ef5bcae0 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -21380,11 +21380,11 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
 }
 
 bool ARMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
-  return Subtarget->hasV6T2Ops();
+  return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
 }
 
 bool ARMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
-  return Subtarget->hasV6T2Ops();
+  return Subtarget->hasV5TOps() && !Subtarget->isThumb1Only();
 }
 
 bool ARMTargetLowering::isMaskAndCmp0FoldingBeneficial(

diff  --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index fc12f050fa5a5..cdff649ecfa57 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -206,7 +206,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) : ST(ST) {
 
   getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
 
-  if (ST.hasV5TOps()) {
+  if (ST.hasV5TOps() && !ST.isThumb1Only()) {
     getActionDefinitionsBuilder(G_CTLZ)
         .legalFor({s32, s32})
         .clampScalar(1, s32, s32)

diff  --git a/llvm/lib/Target/ARM/README.txt b/llvm/lib/Target/ARM/README.txt
index def67cfae7277..ff84e07fa084a 100644
--- a/llvm/lib/Target/ARM/README.txt
+++ b/llvm/lib/Target/ARM/README.txt
@@ -697,22 +697,6 @@ target-neutral one.
 
 //===---------------------------------------------------------------------===//
 
-Optimize unnecessary checks for zero with __builtin_clz/ctz.  Those builtins
-are specified to be undefined at zero, so portable code must check for zero
-and handle it as a special case.  That is unnecessary on ARM where those
-operations are implemented in a way that is well-defined for zero.  For
-example:
-
-int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
-
-should just be implemented with a CLZ instruction.  Since there are other
-targets, e.g., PPC, that share this behavior, it would be best to implement
-this in a target-independent way: we should probably fold that (when using
-"undefined at zero" semantics) to set the "defined at zero" bit and have
-the code generator expand out the right code.
-
-//===---------------------------------------------------------------------===//
-
 Clean up the test/MC/ARM files to have more robust register choices.
 
 R0 should not be used as a register operand in the assembler tests as it's then

diff  --git a/llvm/test/CodeGen/ARM/clz.ll b/llvm/test/CodeGen/ARM/clz.ll
index 0f49fbba11845..9e1e9f6ce6daa 100644
--- a/llvm/test/CodeGen/ARM/clz.ll
+++ b/llvm/test/CodeGen/ARM/clz.ll
@@ -1,12 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
 ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL
 
 declare i32 @llvm.ctlz.i32(i32, i1)
 
-define i32 @test(i32 %x) {
-; CHECK-LABEL: test
-; INLINE: clz r0, r0
-; LIBCALL: b __clzsi2
+define i32 @undef_zero(i32 %x) {
+; INLINE-LABEL: undef_zero:
+; INLINE:       @ %bb.0:
+; INLINE-NEXT:    clz r0, r0
+; INLINE-NEXT:    bx lr
+;
+; LIBCALL-LABEL: undef_zero:
+; LIBCALL:       @ %bb.0:
+; LIBCALL-NEXT:    b __clzsi2
         %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
+
+define i32 @no_undef_zero(i32 %x) {
+; INLINE-LABEL: no_undef_zero:
+; INLINE:       @ %bb.0:
+; INLINE-NEXT:    clz r0, r0
+; INLINE-NEXT:    bx lr
+;
+; LIBCALL-LABEL: no_undef_zero:
+; LIBCALL:       @ %bb.0:
+; LIBCALL-NEXT:    cmp r0, #0
+; LIBCALL-NEXT:    moveq r0, #32
+; LIBCALL-NEXT:    moveq pc, lr
+; LIBCALL-NEXT:  .LBB1_1: @ %cond.false
+; LIBCALL-NEXT:    .save {r11, lr}
+; LIBCALL-NEXT:    push {r11, lr}
+; LIBCALL-NEXT:    bl __clzsi2
+; LIBCALL-NEXT:    pop {r11, lr}
+; LIBCALL-NEXT:    mov pc, lr
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+        ret i32 %tmp.1
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

diff  --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll
index 1146ad64ee709..bf42e9f1104b4 100644
--- a/llvm/test/CodeGen/ARM/cttz.ll
+++ b/llvm/test/CodeGen/ARM/cttz.ll
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple arm-eabi -mattr=+v5t | FileCheck %s --check-prefix=CHECK-5
 ; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 | FileCheck %s
 ; RUN: llc < %s -mtriple arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s
 ; RUN: llc < %s -mtriple thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-6M
@@ -14,6 +15,15 @@ declare i64 @llvm.cttz.i64(i64, i1)
 ;------------------------------------------------------------------------------
 
 define i8 @test_i8(i8 %a) {
+; CHECK-5-LABEL: test_i8:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    orr r0, r0, #256
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i8:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    orr r0, r0, #256
@@ -81,6 +91,15 @@ define i8 @test_i8(i8 %a) {
 }
 
 define i16 @test_i16(i16 %a) {
+; CHECK-5-LABEL: test_i16:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    orr r0, r0, #65536
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i16:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    orr r0, r0, #65536
@@ -148,6 +167,14 @@ define i16 @test_i16(i16 %a) {
 }
 
 define i32 @test_i32(i32 %a) {
+; CHECK-5-LABEL: test_i32:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i32:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r0, r0
@@ -207,6 +234,21 @@ define i32 @test_i32(i32 %a) {
 }
 
 define i64 @test_i64(i64 %a) {
+; CHECK-5-LABEL: test_i64:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r3, r1, #1
+; CHECK-5-NEXT:    sub r2, r0, #1
+; CHECK-5-NEXT:    bic r1, r3, r1
+; CHECK-5-NEXT:    bic r2, r2, r0
+; CHECK-5-NEXT:    clz r1, r1
+; CHECK-5-NEXT:    clz r2, r2
+; CHECK-5-NEXT:    rsb r1, r1, #64
+; CHECK-5-NEXT:    cmp r0, #0
+; CHECK-5-NEXT:    rsbne r1, r2, #32
+; CHECK-5-NEXT:    mov r0, r1
+; CHECK-5-NEXT:    mov r1, #0
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i64:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r1, r1
@@ -323,6 +365,14 @@ define i64 @test_i64(i64 %a) {
 ;------------------------------------------------------------------------------
 
 define i8 @test_i8_zero_undef(i8 %a) {
+; CHECK-5-LABEL: test_i8_zero_undef:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i8_zero_undef:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r0, r0
@@ -377,6 +427,14 @@ define i8 @test_i8_zero_undef(i8 %a) {
 }
 
 define i16 @test_i16_zero_undef(i16 %a) {
+; CHECK-5-LABEL: test_i16_zero_undef:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i16_zero_undef:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r0, r0
@@ -432,6 +490,14 @@ define i16 @test_i16_zero_undef(i16 %a) {
 
 
 define i32 @test_i32_zero_undef(i32 %a) {
+; CHECK-5-LABEL: test_i32_zero_undef:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r1, r0, #1
+; CHECK-5-NEXT:    bic r0, r1, r0
+; CHECK-5-NEXT:    clz r0, r0
+; CHECK-5-NEXT:    rsb r0, r0, #32
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i32_zero_undef:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r0, r0
@@ -486,6 +552,21 @@ define i32 @test_i32_zero_undef(i32 %a) {
 }
 
 define i64 @test_i64_zero_undef(i64 %a) {
+; CHECK-5-LABEL: test_i64_zero_undef:
+; CHECK-5:       @ %bb.0:
+; CHECK-5-NEXT:    sub r3, r1, #1
+; CHECK-5-NEXT:    sub r2, r0, #1
+; CHECK-5-NEXT:    bic r1, r3, r1
+; CHECK-5-NEXT:    bic r2, r2, r0
+; CHECK-5-NEXT:    clz r1, r1
+; CHECK-5-NEXT:    clz r2, r2
+; CHECK-5-NEXT:    rsb r1, r1, #64
+; CHECK-5-NEXT:    cmp r0, #0
+; CHECK-5-NEXT:    rsbne r1, r2, #32
+; CHECK-5-NEXT:    mov r0, r1
+; CHECK-5-NEXT:    mov r1, #0
+; CHECK-5-NEXT:    bx lr
+;
 ; CHECK-LABEL: test_i64_zero_undef:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    rbit r1, r1


        


More information about the llvm-commits mailing list