[llvm] [AMDGPU] add s_bitset[10]_b32 optimization for shl+[or, andn2] pattern (PR #134155)

Mon Apr 14 00:32:36 PDT 2025

================
@@ -594,6 +595,64 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
   return false;
 }
 
+//  case 1:
+//  From:
+//  s_lshl_b32 s1, 1, s1
+//  s_or_b32 s0, s0, s1
+//  To:
+//  s_bitset1_b32 s0, s1
+//
+//  case 2:
+//  s_lshl_b32 s1, 1, s1
+//  s_andn2_b32 s0, s0, s1
+//  To:
+//  s_bitset0_b32 s0, s1
+bool SIShrinkInstructions::shrinkToBitset(MachineInstr &MI) const {
+  unsigned Opc = MI.getOpcode();
+  const MachineOperand *Dest = &MI.getOperand(0);
+  MachineOperand *Src0 = &MI.getOperand(1);
+  MachineOperand *Src1 = &MI.getOperand(2);
+
+  if (Src0->isReg() && Src1->isReg() && Dest->getReg() == Src0->getReg()) {
----------------
jmmartinez wrote:

[Use Early Exits and continue to Simplify Code](https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code)

https://github.com/llvm/llvm-project/pull/134155