[llvm] [AMDGPU] add s_bitset[10]_b32 optimization for shl+[or, andn2] pattern (PR #134155)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 3 07:53:57 PDT 2025


================
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+define amdgpu_ps i32 @s_bitset1_b32(i32 inreg %src0, i32 inreg %bit.index) {
+; SI-LABEL: s_bitset1_b32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_bitset1_b32 s0, s1
+; SI-NEXT:    ; return to shader part epilog
+  %set.bit.at.index = shl i32 1, %bit.index
+  %or = or i32 %src0, %set.bit.at.index
+  ret i32 %or
+}
+
+define amdgpu_ps i32 @s_bitset0_b32(i32 inreg %src0, i32 inreg %bit.index) {
+; SI-LABEL: s_bitset0_b32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_bitset0_b32 s0, s1
+; SI-NEXT:    ; return to shader part epilog
+  %set.bit.at.index = shl i32 1, %bit.index
+  %other.bits = xor i32 %set.bit.at.index, -1
+  %and = and i32 %src0, %other.bits
+  ret i32 %and
+}
----------------
arsenm wrote:

Should also repeat the same pattern with vector types.

Also should add copies of the functions that use VGPR inputs, we don't want to select these in that case 

https://github.com/llvm/llvm-project/pull/134155


More information about the llvm-commits mailing list