[llvm] [AMDGPU] Fold uniform readfirstlane + cndmask (PR #70188)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 25 03:35:28 PDT 2023
================
@@ -1400,6 +1401,88 @@ bool SIFoldOperands::tryFoldFoldableCopy(
return Changed;
}
+// Try to fold the following pattern:
+// s_cselect s[2:3], K, 0 ; K has LSB set. Usually it's +-1.
+// v_cndmask v0, 0, +-1, s[2:3]
+// v_readfirstlane s0, v0
+//
+// into (for example)
+//
+// s_cselect s[2:3], K, 0
+// s_bfe_u64 s0, s[2:3], 0x10000
+bool SIFoldOperands::tryFoldUniformReadFirstLaneCndMask(
+ MachineInstr &MI) const {
+ if (MI.getOpcode() != AMDGPU::V_READFIRSTLANE_B32)
+ return false;
+
+ MachineInstr *RFLSrc = MRI->getVRegDef(MI.getOperand(1).getReg());
+ // We can also have the following pattern:
+ //
+ // %2:vreg_64 = REG_SEQUENCE %X:vgpr_32, sub0, %1:sreg_32, sub1
+ // %3:sgpr_32 = V_READFIRSTLANE_B32 %2.sub0:vreg_64
+ //
+ // In this case we dig into %X or %Y depending on which sub register
+ // the V_READFIRSTLANE accesses.
+ if (RFLSrc->isRegSequence()) {
+ unsigned RFLSubReg = MI.getOperand(1).getSubReg();
+ if (RFLSrc->getNumOperands() != 5)
+ return false;
+
+ if (RFLSrc->getOperand(2).getImm() == RFLSubReg)
+ RFLSrc = MRI->getVRegDef(RFLSrc->getOperand(1).getReg());
+ else if (RFLSrc->getOperand(4).getImm() == RFLSubReg)
+ RFLSrc = MRI->getVRegDef(RFLSrc->getOperand(3).getReg());
+ else
+ return false;
+ }
+
+ // Need e64 to have a SGPR regmask.
+ if (!RFLSrc || RFLSrc->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
+ return false;
+
+ MachineOperand *Src0 = TII->getNamedOperand(*RFLSrc, AMDGPU::OpName::src0);
+ MachineOperand *Src1 = TII->getNamedOperand(*RFLSrc, AMDGPU::OpName::src1);
+ Register Src2 = TII->getNamedOperand(*RFLSrc, AMDGPU::OpName::src2)->getReg();
+
+ if (!Src0->isImm() || Src0->getImm() != 0 || !Src1->isImm())
+ return false;
+
+ // This pattern usually comes from a ext. sext uses -1.
+ bool IsSigned = false;
+ if (Src1->getImm() == -1)
+ IsSigned = true;
+ else if (Src1->getImm() != 1)
+ return false;
----------------
arsenm wrote:
IsSigned = Src1->getImm() == -1
https://github.com/llvm/llvm-project/pull/70188
More information about the llvm-commits
mailing list