[llvm-branch-commits] [SPARC] Use lzcnt to implement CTLZ when we have VIS3 (PR #135715)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 14 17:46:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-sparc
Author: Koakuma (koachan)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/135715.diff
3 Files Affected:
- (modified) llvm/lib/Target/Sparc/SparcISelLowering.cpp (+3-2)
- (modified) llvm/lib/Target/Sparc/SparcInstrVIS.td (+6)
- (added) llvm/test/CodeGen/SPARC/ctlz.ll (+171)
``````````diff
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 0ad261135651f..c34a55bb2881b 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1753,7 +1753,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64,
Subtarget->usePopc() ? Legal : Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i64,
+ Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
@@ -1815,7 +1816,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
index 925bcdc9070fa..241d6bc11e963 100644
--- a/llvm/lib/Target/Sparc/SparcInstrVIS.td
+++ b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -303,4 +303,10 @@ def : Pat<(i64 (mulhs i64:$lhs, i64:$rhs)),
(SUBrr (UMULXHI $lhs, $rhs),
(ADDrr (ANDrr (SRAXri $lhs, 63), $rhs),
(ANDrr (SRAXri $rhs, 63), $lhs)))>;
+
+def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
+// 32-bit LZCNT.
+// The zero extension will leave us with 32 extra leading zeros,
+// so we need to compensate for it.
+def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
} // Predicates = [HasVIS3]
diff --git a/llvm/test/CodeGen/SPARC/ctlz.ll b/llvm/test/CodeGen/SPARC/ctlz.ll
new file mode 100644
index 0000000000000..3b2fc0dbfd4a3
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/ctlz.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @f(i32 %x) nounwind {
+; V9-LABEL: f:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: srl %o0, 1, %o1
+; V9-NEXT: or %o0, %o1, %o1
+; V9-NEXT: srl %o1, 2, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srl %o1, 4, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srl %o1, 8, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srl %o1, 16, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: xor %o1, -1, %o1
+; V9-NEXT: srl %o1, 1, %o2
+; V9-NEXT: sethi 1398101, %o3
+; V9-NEXT: or %o3, 341, %o3
+; V9-NEXT: and %o2, %o3, %o2
+; V9-NEXT: sub %o1, %o2, %o1
+; V9-NEXT: sethi 838860, %o2
+; V9-NEXT: or %o2, 819, %o2
+; V9-NEXT: and %o1, %o2, %o3
+; V9-NEXT: srl %o1, 2, %o1
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: add %o3, %o1, %o1
+; V9-NEXT: srl %o1, 4, %o2
+; V9-NEXT: add %o1, %o2, %o1
+; V9-NEXT: sethi 246723, %o2
+; V9-NEXT: or %o2, 783, %o2
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: sll %o1, 8, %o2
+; V9-NEXT: add %o1, %o2, %o1
+; V9-NEXT: sll %o1, 16, %o2
+; V9-NEXT: add %o1, %o2, %o1
+; V9-NEXT: srl %o1, 24, %o1
+; V9-NEXT: cmp %o0, 0
+; V9-NEXT: move %icc, 0, %o1
+; V9-NEXT: retl
+; V9-NEXT: mov %o1, %o0
+;
+; POPC-LABEL: f:
+; POPC: ! %bb.0: ! %entry
+; POPC-NEXT: srl %o0, 1, %o1
+; POPC-NEXT: or %o0, %o1, %o1
+; POPC-NEXT: srl %o1, 2, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srl %o1, 4, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srl %o1, 8, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srl %o1, 16, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: xor %o1, -1, %o1
+; POPC-NEXT: srl %o1, 0, %o1
+; POPC-NEXT: popc %o1, %o1
+; POPC-NEXT: cmp %o0, 0
+; POPC-NEXT: move %icc, 0, %o1
+; POPC-NEXT: retl
+; POPC-NEXT: mov %o1, %o0
+;
+; VIS3-LABEL: f:
+; VIS3: ! %bb.0: ! %entry
+; VIS3-NEXT: srl %o0, 0, %o1
+; VIS3-NEXT: lzcnt %o1, %o1
+; VIS3-NEXT: add %o1, -32, %o1
+; VIS3-NEXT: cmp %o0, 0
+; VIS3-NEXT: move %icc, 0, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov %o1, %o0
+entry:
+ %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %1 = icmp eq i32 %x, 0
+ %2 = select i1 %1, i32 0, i32 %0
+ %3 = trunc i32 %2 to i8
+ %conv = zext i8 %3 to i32
+ ret i32 %conv
+}
+
+define i64 @g(i64 %x) nounwind {
+; V9-LABEL: g:
+; V9: ! %bb.0: ! %entry
+; V9-NEXT: srlx %o0, 1, %o1
+; V9-NEXT: or %o0, %o1, %o1
+; V9-NEXT: srlx %o1, 2, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srlx %o1, 4, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srlx %o1, 8, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srlx %o1, 16, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: srlx %o1, 32, %o2
+; V9-NEXT: or %o1, %o2, %o1
+; V9-NEXT: xor %o1, -1, %o1
+; V9-NEXT: srlx %o1, 1, %o2
+; V9-NEXT: sethi 1398101, %o3
+; V9-NEXT: or %o3, 341, %o3
+; V9-NEXT: sllx %o3, 32, %o4
+; V9-NEXT: or %o4, %o3, %o3
+; V9-NEXT: and %o2, %o3, %o2
+; V9-NEXT: sub %o1, %o2, %o1
+; V9-NEXT: sethi 838860, %o2
+; V9-NEXT: or %o2, 819, %o2
+; V9-NEXT: sllx %o2, 32, %o3
+; V9-NEXT: or %o3, %o2, %o2
+; V9-NEXT: and %o1, %o2, %o3
+; V9-NEXT: srlx %o1, 2, %o1
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: add %o3, %o1, %o1
+; V9-NEXT: srlx %o1, 4, %o2
+; V9-NEXT: add %o1, %o2, %o1
+; V9-NEXT: sethi 246723, %o2
+; V9-NEXT: or %o2, 783, %o2
+; V9-NEXT: sllx %o2, 32, %o3
+; V9-NEXT: or %o3, %o2, %o2
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: sethi 16448, %o2
+; V9-NEXT: or %o2, 257, %o2
+; V9-NEXT: sllx %o2, 32, %o3
+; V9-NEXT: or %o3, %o2, %o2
+; V9-NEXT: mulx %o1, %o2, %o1
+; V9-NEXT: srlx %o1, 56, %o1
+; V9-NEXT: movrz %o0, 0, %o1
+; V9-NEXT: retl
+; V9-NEXT: mov %o1, %o0
+;
+; POPC-LABEL: g:
+; POPC: ! %bb.0: ! %entry
+; POPC-NEXT: srlx %o0, 1, %o1
+; POPC-NEXT: or %o0, %o1, %o1
+; POPC-NEXT: srlx %o1, 2, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srlx %o1, 4, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srlx %o1, 8, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srlx %o1, 16, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: srlx %o1, 32, %o2
+; POPC-NEXT: or %o1, %o2, %o1
+; POPC-NEXT: xor %o1, -1, %o1
+; POPC-NEXT: popc %o1, %o1
+; POPC-NEXT: movrz %o0, 0, %o1
+; POPC-NEXT: retl
+; POPC-NEXT: mov %o1, %o0
+;
+; VIS3-LABEL: g:
+; VIS3: ! %bb.0: ! %entry
+; VIS3-NEXT: lzcnt %o0, %o1
+; VIS3-NEXT: movrz %o0, 0, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov %o1, %o0
+entry:
+ %0 = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ %1 = icmp eq i64 %x, 0
+ %2 = select i1 %1, i64 0, i64 %0
+ %3 = trunc i64 %2 to i32
+ %conv = zext i32 %3 to i64
+ ret i64 %conv
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.ctlz.i32(i32, i1 immarg) #0
+declare i64 @llvm.ctlz.i64(i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
``````````
</details>
https://github.com/llvm/llvm-project/pull/135715
More information about the llvm-branch-commits
mailing list