[llvm-branch-commits] [SPARC] Use lzcnt to implement CTLZ when we have VIS3 (PR #135715)

Sergei Barannikov via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Apr 14 18:32:02 PDT 2025


================
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @f(i32 %x) nounwind {
+; V9-LABEL: f:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srl %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srl %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srl %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srl %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srl %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 8, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sll %o1, 16, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    srl %o1, 24, %o1
+; V9-NEXT:    cmp %o0, 0
+; V9-NEXT:    move %icc, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: f:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srl %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srl %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srl %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    srl %o1, 0, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    cmp %o0, 0
+; POPC-NEXT:    move %icc, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: f:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    srl %o0, 0, %o1
+; VIS3-NEXT:    lzcnt %o1, %o1
+; VIS3-NEXT:    add %o1, -32, %o1
+; VIS3-NEXT:    cmp %o0, 0
+; VIS3-NEXT:    move %icc, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %1 = icmp eq i32 %x, 0
+  %2 = select i1 %1, i32 0, i32 %0
+  %3 = trunc i32 %2 to i8
+  %conv = zext i8 %3 to i32
+  ret i32 %conv
+}
+
+define i64 @g(i64 %x) nounwind {
+; V9-LABEL: g:
+; V9:       ! %bb.0: ! %entry
+; V9-NEXT:    srlx %o0, 1, %o1
+; V9-NEXT:    or %o0, %o1, %o1
+; V9-NEXT:    srlx %o1, 2, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 8, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 16, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 32, %o2
+; V9-NEXT:    or %o1, %o2, %o1
+; V9-NEXT:    xor %o1, -1, %o1
+; V9-NEXT:    srlx %o1, 1, %o2
+; V9-NEXT:    sethi 1398101, %o3
+; V9-NEXT:    or %o3, 341, %o3
+; V9-NEXT:    sllx %o3, 32, %o4
+; V9-NEXT:    or %o4, %o3, %o3
+; V9-NEXT:    and %o2, %o3, %o2
+; V9-NEXT:    sub %o1, %o2, %o1
+; V9-NEXT:    sethi 838860, %o2
+; V9-NEXT:    or %o2, 819, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o3
+; V9-NEXT:    srlx %o1, 2, %o1
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    add %o3, %o1, %o1
+; V9-NEXT:    srlx %o1, 4, %o2
+; V9-NEXT:    add %o1, %o2, %o1
+; V9-NEXT:    sethi 246723, %o2
+; V9-NEXT:    or %o2, 783, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    and %o1, %o2, %o1
+; V9-NEXT:    sethi 16448, %o2
+; V9-NEXT:    or %o2, 257, %o2
+; V9-NEXT:    sllx %o2, 32, %o3
+; V9-NEXT:    or %o3, %o2, %o2
+; V9-NEXT:    mulx %o1, %o2, %o1
+; V9-NEXT:    srlx %o1, 56, %o1
+; V9-NEXT:    movrz %o0, 0, %o1
+; V9-NEXT:    retl
+; V9-NEXT:    mov %o1, %o0
+;
+; POPC-LABEL: g:
+; POPC:       ! %bb.0: ! %entry
+; POPC-NEXT:    srlx %o0, 1, %o1
+; POPC-NEXT:    or %o0, %o1, %o1
+; POPC-NEXT:    srlx %o1, 2, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 4, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 8, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 16, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    srlx %o1, 32, %o2
+; POPC-NEXT:    or %o1, %o2, %o1
+; POPC-NEXT:    xor %o1, -1, %o1
+; POPC-NEXT:    popc %o1, %o1
+; POPC-NEXT:    movrz %o0, 0, %o1
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov %o1, %o0
+;
+; VIS3-LABEL: g:
+; VIS3:       ! %bb.0: ! %entry
+; VIS3-NEXT:    lzcnt %o0, %o1
+; VIS3-NEXT:    movrz %o0, 0, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov %o1, %o0
+entry:
+  %0 = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %1 = icmp eq i64 %x, 0
+  %2 = select i1 %1, i64 0, i64 %0
+  %3 = trunc i64 %2 to i32
+  %conv = zext i32 %3 to i64
+  ret i64 %conv
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare i32 @llvm.ctlz.i32(i32, i1 immarg) #0
+declare i64 @llvm.ctlz.i64(i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
----------------
s-barannikov wrote:

Also looks irrelevant.

https://github.com/llvm/llvm-project/pull/135715


More information about the llvm-branch-commits mailing list