[clang] [llvm] [AMDGPU] Match bitsin(typeof(x)) - popcnt(x) to s_bcnt0_i32 (PR #164847)

Patrick Simmons via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 5 12:14:32 PST 2025


================
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
+
+define amdgpu_ps i32 @bcnt032_not_for_vregs(i64 %val) {
+; CHECK-LABEL: bcnt032_not_for_vregs:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_bcnt_u32_b32 v0, v0, 0
+; CHECK-NEXT:    v_sub_u32_e32 v0, 32, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %val0 = trunc i64 %val to i32
+  %result = call i32 @llvm.ctpop.i32(i32 %val0)
+  %result2 = sub i32 32, %result
+  call void asm "; use $0", "s"(i32 %result2)
+  %cmp = icmp ne i32 %result2, 0
+  %zext = zext i1 %cmp to i32
+  ret i32 %zext
+}
+
+define amdgpu_ps i32 @bcnt064_not_for_vregs(i64 %val0) {
+; CHECK-LABEL: bcnt064_not_for_vregs:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_bcnt_u32_b32 v0, v0, 0
+; CHECK-NEXT:    v_bcnt_u32_b32 v0, v1, v0
+; CHECK-NEXT:    v_sub_co_u32_e32 v0, vcc, 64, v0
+; CHECK-NEXT:    v_subb_co_u32_e64 v1, s[0:1], 0, 0, vcc
+; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:1]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %result = call i64 @llvm.ctpop.i64(i64 %val0)
+  %result2 = sub i64 64, %result
+  call void asm "; use $0", "s"(i64 %result2)
----------------
linuxrocks123 wrote:

Done

https://github.com/llvm/llvm-project/pull/164847


More information about the llvm-commits mailing list