[llvm-branch-commits] [SPARC] Use lzcnt to implement CTLZ when we have VIS3 (PR #135715)

Sergei Barannikov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Apr 15 18:35:39 PDT 2025


================
@@ -0,0 +1,313 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @i32_nopoison(i32 %x) nounwind {
+; V9-LABEL: i32_nopoison:
+; V9:       ! %bb.0:
+; V9-NEXT:    cmp %o0, 0
+; V9-NEXT:    be %icc, .LBB0_2
+; V9-NEXT:    nop
+; V9-NEXT:  ! %bb.1: ! %cond.false
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 2, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 4, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 8, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 16, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    xor %o0, -1, %o0
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    sethi 1398101, %o2
+; V9-NEXT:    or %o2, 341, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sub %o0, %o1, %o0
+; V9-NEXT:    sethi 838860, %o1
+; V9-NEXT:    or %o1, 819, %o1
+; V9-NEXT:    and %o0, %o1, %o2
+; V9-NEXT:    srl %o0, 2, %o0
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    add %o2, %o0, %o0
+; V9-NEXT:    srl %o0, 4, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    sethi 246723, %o1
+; V9-NEXT:    or %o1, 783, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sll %o0, 8, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    sll %o0, 16, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    retl
+; V9-NEXT:    srl %o0, 24, %o0
+; V9-NEXT:  .LBB0_2:
+; V9-NEXT:    retl
+; V9-NEXT:    mov 32, %o0
+;
+; POPC-LABEL: i32_nopoison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    cmp %o0, 0
+; POPC-NEXT:    be %icc, .LBB0_2
+; POPC-NEXT:    nop
+; POPC-NEXT:  ! %bb.1: ! %cond.false
+; POPC-NEXT:    srl %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 2, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 4, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 8, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 16, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    xor %o0, -1, %o0
+; POPC-NEXT:    srl %o0, 0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+; POPC-NEXT:  .LBB0_2:
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov 32, %o0
+;
+; VIS3-LABEL: i32_nopoison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    cmp %o0, 0
+; VIS3-NEXT:    be %icc, .LBB0_2
+; VIS3-NEXT:    nop
+; VIS3-NEXT:  ! %bb.1: ! %cond.false
+; VIS3-NEXT:    sllx %o0, 32, %o0
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:  .LBB0_2:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov 32, %o0
+  %ret = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  ret i32 %ret
+}
+
+define i32 @i32_poison(i32 %x) nounwind {
+; V9-LABEL: i32_poison:
+; V9:       ! %bb.0:
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 2, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 4, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 8, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 16, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    xor %o0, -1, %o0
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    sethi 1398101, %o2
+; V9-NEXT:    or %o2, 341, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sub %o0, %o1, %o0
+; V9-NEXT:    sethi 838860, %o1
+; V9-NEXT:    or %o1, 819, %o1
+; V9-NEXT:    and %o0, %o1, %o2
+; V9-NEXT:    srl %o0, 2, %o0
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    add %o2, %o0, %o0
+; V9-NEXT:    srl %o0, 4, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    sethi 246723, %o1
+; V9-NEXT:    or %o1, 783, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sll %o0, 8, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    sll %o0, 16, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    retl
+; V9-NEXT:    srl %o0, 24, %o0
+;
+; POPC-LABEL: i32_poison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    srl %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 2, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 4, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 8, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srl %o0, 16, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    xor %o0, -1, %o0
+; POPC-NEXT:    srl %o0, 0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+;
+; VIS3-LABEL: i32_poison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    sllx %o0, 32, %o0
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    lzcnt %o0, %o0
+  %ret = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  ret i32 %ret
+}
+
+define i64 @i64_nopoison(i64 %x) nounwind {
+; V9-LABEL: i64_nopoison:
+; V9:       ! %bb.0:
+; V9-NEXT:    brz %o0, .LBB2_2
+; V9-NEXT:    nop
+; V9-NEXT:  ! %bb.1: ! %cond.false
+; V9-NEXT:    srlx %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 2, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 4, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 8, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 16, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 32, %o1
+; V9-NEXT:    or %o0, %o1, %o0
+; V9-NEXT:    xor %o0, -1, %o0
+; V9-NEXT:    srlx %o0, 1, %o1
+; V9-NEXT:    sethi 1398101, %o2
+; V9-NEXT:    or %o2, 341, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sub %o0, %o1, %o0
+; V9-NEXT:    sethi 838860, %o1
+; V9-NEXT:    or %o1, 819, %o1
+; V9-NEXT:    sllx %o1, 32, %o2
+; V9-NEXT:    or %o2, %o1, %o1
+; V9-NEXT:    and %o0, %o1, %o2
+; V9-NEXT:    srlx %o0, 2, %o0
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    add %o2, %o0, %o0
+; V9-NEXT:    srlx %o0, 4, %o1
+; V9-NEXT:    add %o0, %o1, %o0
+; V9-NEXT:    sethi 246723, %o1
+; V9-NEXT:    or %o1, 783, %o1
+; V9-NEXT:    sllx %o1, 32, %o2
+; V9-NEXT:    or %o2, %o1, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sethi 16448, %o1
+; V9-NEXT:    or %o1, 257, %o1
+; V9-NEXT:    sllx %o1, 32, %o2
+; V9-NEXT:    or %o2, %o1, %o1
+; V9-NEXT:    mulx %o0, %o1, %o0
+; V9-NEXT:    retl
+; V9-NEXT:    srlx %o0, 56, %o0
+; V9-NEXT:  .LBB2_2:
+; V9-NEXT:    retl
+; V9-NEXT:    mov 64, %o0
+;
+; POPC-LABEL: i64_nopoison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    brz %o0, .LBB2_2
+; POPC-NEXT:    nop
+; POPC-NEXT:  ! %bb.1: ! %cond.false
+; POPC-NEXT:    srlx %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srlx %o0, 2, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srlx %o0, 4, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srlx %o0, 8, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srlx %o0, 16, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    srlx %o0, 32, %o1
+; POPC-NEXT:    or %o0, %o1, %o0
+; POPC-NEXT:    xor %o0, -1, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+; POPC-NEXT:  .LBB2_2:
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov 64, %o0
+;
+; VIS3-LABEL: i64_nopoison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    brz %o0, .LBB2_2
+; VIS3-NEXT:    nop
+; VIS3-NEXT:  ! %bb.1: ! %cond.false
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:  .LBB2_2:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov 64, %o0
+  %ret = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+  ret i64 %ret
+}
+
+define i64 @i64_poison(i64 %x) nounwind {
+; V9-LABEL: i64_poison:
+; V9:       ! %bb.0:
+; V9-NEXT:    srlx %o0, 1, %o1
----------------
s-barannikov wrote:

This is very unfortunate expansion. It should be expanded into a call to a runtime library function, but there may be some issues involving interaction with `CTPOP` and `CTTZ` expansion. Can you add a FIXME for this as well or just try `Libcall` action and see what happens?


https://github.com/llvm/llvm-project/pull/135715


More information about the llvm-branch-commits mailing list