[llvm-branch-commits] [SPARC] Use lzcnt to implement CTLZ when we have VIS3 (PR #135715)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 14 17:46:38 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-sparc

Author: Koakuma (koachan)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/135715.diff


3 Files Affected:

- (modified) llvm/lib/Target/Sparc/SparcISelLowering.cpp (+3-2) 
- (modified) llvm/lib/Target/Sparc/SparcInstrVIS.td (+6) 
- (added) llvm/test/CodeGen/SPARC/ctlz.ll (+171) 


``````````diff
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 0ad261135651f..c34a55bb2881b 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1753,7 +1753,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP, MVT::i64,
                        Subtarget->usePopc() ? Legal : Expand);
     setOperationAction(ISD::CTTZ , MVT::i64, Expand);
-    setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+    setOperationAction(ISD::CTLZ, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
     setOperationAction(ISD::BSWAP, MVT::i64, Expand);
     setOperationAction(ISD::ROTL , MVT::i64, Expand);
     setOperationAction(ISD::ROTR , MVT::i64, Expand);
@@ -1815,7 +1816,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ, MVT::i32, Subtarget->isVIS3() ? Legal : Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index 925bcdc9070fa..241d6bc11e963 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -303,4 +303,10 @@ def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
       (SUBrr (UMULXHI $lhs, $rhs),
              (ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
                     (ANDrr (SRAXri $rhs, 63), $lhs)))>;
+
+def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
+// 32-bit LZCNT.
+// The zero extension will leave us with 32 extra leading zeros,
+// so we need to compensate for it.
+def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
 } // Predicates = [HasVIS3]
diff --git a/llvm/test/CodeGen/SPARC/ctlz.ll b/llvm/test/CodeGen/SPARC/ctlz.ll
new file mode 100644
index 0000000000000..3b2fc0dbfd4a3
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/ctlz.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @f(i32 %x) nounwind {
+; V9-LABEL: f:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srl %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srl %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srl %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 8, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 16, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 24, %o1
+; V9-NEXT:    cmp %o0, 0
+; V9-NEXT:    move %icc, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: f:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srl %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srl %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    srl %o1, 0, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    cmp %o0, 0
+; POPC-NEXT:    move %icc, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: f:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    srl %o0, 0, %o1
+; VIS3-NEXT:    lzcnt %o1, %o1
+; VIS3-NEXT:    add %o1, -32, %o1
+; VIS3-NEXT:    cmp %o0, 0
+; VIS3-NEXT:    move %icc, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %1 = icmp eq i32 %x, 0
+  %2 = select i1 %1, i32 0, i32 %0
+  %3 = trunc i32 %2 to i8
+  %conv = zext i8 %3 to i32
+  ret i32 %conv
+}
+
+define i64 @g(i64 %x) nounwind {
+; V9-LABEL: g:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srlx %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srlx %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 32, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srlx %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    sllx %o3, 32, %o4
+; V9-NEXT:    or %o4, %o3, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srlx %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sethi 16448, %o2
+; V9-NEXT:    or %o2, 257, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    mulx %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 56, %o1
+; V9-NEXT:    movrz %o0, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: g:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srlx %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srlx %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 32, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    movrz %o0, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: g:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    lzcnt %o0, %o1
+; VIS3-NEXT:    movrz %o0, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %1 = icmp eq i64 %x, 0
+  %2 = select i1 %1, i64 0, i64 %0
+  %3 = trunc i64 %2 to i32
+  %conv = zext i32 %3 to i64
+  ret i64 %conv
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.ctlz.i32(i32, i1 immarg) #0
+declare i64 @llvm.ctlz.i64(i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }

``````````

</details>


https://github.com/llvm/llvm-project/pull/135715


More information about the llvm-branch-commits mailing list