[llvm-branch-commits] [SPARC] Use lzcnt to implement CTLZ when we have VIS3 (PR #135715)
Sergei Barannikov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Apr 15 18:35:39 PDT 2025
================
@@ -0,0 +1,313 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @i32_nopoison(i32 %x) nounwind {
+; V9-LABEL: i32_nopoison:
+; V9: ! %bb.0:
+; V9-NEXT: cmp %o0, 0
+; V9-NEXT: be %icc, .LBB0_2
+; V9-NEXT: nop
+; V9-NEXT: ! %bb.1: ! %cond.false
+; V9-NEXT: srl %o0, 1, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 2, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 4, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 8, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 16, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: xor %o0, -1, %o0
+; V9-NEXT: srl %o0, 1, %o1
+; V9-NEXT: sethi 1398101, %o2
+; V9-NEXT: or %o2, 341, %o2
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: sub %o0, %o1, %o0
+; V9-NEXT: sethi 838860, %o1
+; V9-NEXT: or %o1, 819, %o1
+; V9-NEXT: and %o0, %o1, %o2
+; V9-NEXT: srl %o0, 2, %o0
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: add %o2, %o0, %o0
+; V9-NEXT: srl %o0, 4, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: sethi 246723, %o1
+; V9-NEXT: or %o1, 783, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sll %o0, 8, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: sll %o0, 16, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: retl
+; V9-NEXT: srl %o0, 24, %o0
+; V9-NEXT: .LBB0_2:
+; V9-NEXT: retl
+; V9-NEXT: mov 32, %o0
+;
+; POPC-LABEL: i32_nopoison:
+; POPC: ! %bb.0:
+; POPC-NEXT: cmp %o0, 0
+; POPC-NEXT: be %icc, .LBB0_2
+; POPC-NEXT: nop
+; POPC-NEXT: ! %bb.1: ! %cond.false
+; POPC-NEXT: srl %o0, 1, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 2, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 4, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 8, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 16, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: xor %o0, -1, %o0
+; POPC-NEXT: srl %o0, 0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+; POPC-NEXT: .LBB0_2:
+; POPC-NEXT: retl
+; POPC-NEXT: mov 32, %o0
+;
+; VIS3-LABEL: i32_nopoison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: cmp %o0, 0
+; VIS3-NEXT: be %icc, .LBB0_2
+; VIS3-NEXT: nop
+; VIS3-NEXT: ! %bb.1: ! %cond.false
+; VIS3-NEXT: sllx %o0, 32, %o0
+; VIS3-NEXT: retl
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: .LBB0_2:
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov 32, %o0
+ %ret = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+ ret i32 %ret
+}
+
+define i32 @i32_poison(i32 %x) nounwind {
+; V9-LABEL: i32_poison:
+; V9: ! %bb.0:
+; V9-NEXT: srl %o0, 1, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 2, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 4, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 8, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srl %o0, 16, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: xor %o0, -1, %o0
+; V9-NEXT: srl %o0, 1, %o1
+; V9-NEXT: sethi 1398101, %o2
+; V9-NEXT: or %o2, 341, %o2
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: sub %o0, %o1, %o0
+; V9-NEXT: sethi 838860, %o1
+; V9-NEXT: or %o1, 819, %o1
+; V9-NEXT: and %o0, %o1, %o2
+; V9-NEXT: srl %o0, 2, %o0
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: add %o2, %o0, %o0
+; V9-NEXT: srl %o0, 4, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: sethi 246723, %o1
+; V9-NEXT: or %o1, 783, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sll %o0, 8, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: sll %o0, 16, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: retl
+; V9-NEXT: srl %o0, 24, %o0
+;
+; POPC-LABEL: i32_poison:
+; POPC: ! %bb.0:
+; POPC-NEXT: srl %o0, 1, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 2, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 4, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 8, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srl %o0, 16, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: xor %o0, -1, %o0
+; POPC-NEXT: srl %o0, 0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+;
+; VIS3-LABEL: i32_poison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: sllx %o0, 32, %o0
+; VIS3-NEXT: retl
+; VIS3-NEXT: lzcnt %o0, %o0
+ %ret = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ ret i32 %ret
+}
+
+define i64 @i64_nopoison(i64 %x) nounwind {
+; V9-LABEL: i64_nopoison:
+; V9: ! %bb.0:
+; V9-NEXT: brz %o0, .LBB2_2
+; V9-NEXT: nop
+; V9-NEXT: ! %bb.1: ! %cond.false
+; V9-NEXT: srlx %o0, 1, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 2, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 4, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 8, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 16, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 32, %o1
+; V9-NEXT: or %o0, %o1, %o0
+; V9-NEXT: xor %o0, -1, %o0
+; V9-NEXT: srlx %o0, 1, %o1
+; V9-NEXT: sethi 1398101, %o2
+; V9-NEXT: or %o2, 341, %o2
+; V9-NEXT: sllx %o2, 32, %o3
+; V9-NEXT: or %o3, %o2, %o2
+; V9-NEXT: and %o1, %o2, %o1
+; V9-NEXT: sub %o0, %o1, %o0
+; V9-NEXT: sethi 838860, %o1
+; V9-NEXT: or %o1, 819, %o1
+; V9-NEXT: sllx %o1, 32, %o2
+; V9-NEXT: or %o2, %o1, %o1
+; V9-NEXT: and %o0, %o1, %o2
+; V9-NEXT: srlx %o0, 2, %o0
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: add %o2, %o0, %o0
+; V9-NEXT: srlx %o0, 4, %o1
+; V9-NEXT: add %o0, %o1, %o0
+; V9-NEXT: sethi 246723, %o1
+; V9-NEXT: or %o1, 783, %o1
+; V9-NEXT: sllx %o1, 32, %o2
+; V9-NEXT: or %o2, %o1, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sethi 16448, %o1
+; V9-NEXT: or %o1, 257, %o1
+; V9-NEXT: sllx %o1, 32, %o2
+; V9-NEXT: or %o2, %o1, %o1
+; V9-NEXT: mulx %o0, %o1, %o0
+; V9-NEXT: retl
+; V9-NEXT: srlx %o0, 56, %o0
+; V9-NEXT: .LBB2_2:
+; V9-NEXT: retl
+; V9-NEXT: mov 64, %o0
+;
+; POPC-LABEL: i64_nopoison:
+; POPC: ! %bb.0:
+; POPC-NEXT: brz %o0, .LBB2_2
+; POPC-NEXT: nop
+; POPC-NEXT: ! %bb.1: ! %cond.false
+; POPC-NEXT: srlx %o0, 1, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srlx %o0, 2, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srlx %o0, 4, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srlx %o0, 8, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srlx %o0, 16, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: srlx %o0, 32, %o1
+; POPC-NEXT: or %o0, %o1, %o0
+; POPC-NEXT: xor %o0, -1, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+; POPC-NEXT: .LBB2_2:
+; POPC-NEXT: retl
+; POPC-NEXT: mov 64, %o0
+;
+; VIS3-LABEL: i64_nopoison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: brz %o0, .LBB2_2
+; VIS3-NEXT: nop
+; VIS3-NEXT: ! %bb.1: ! %cond.false
+; VIS3-NEXT: retl
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: .LBB2_2:
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov 64, %o0
+ %ret = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+ ret i64 %ret
+}
+
+define i64 @i64_poison(i64 %x) nounwind {
+; V9-LABEL: i64_poison:
+; V9: ! %bb.0:
+; V9-NEXT: srlx %o0, 1, %o1
----------------
s-barannikov wrote:
This is very unfortunate expansion. It should be expanded into a call to a runtime library function, but there may be some issues involving interaction with `CTPOP` and `CTTZ` expansion. Can you add a FIXME for this as well or just try `Libcall` action and see what happens?
https://github.com/llvm/llvm-project/pull/135715
More information about the llvm-branch-commits
mailing list