[llvm] [AMDGPU] Canonicalize G_ZEXT of the shift amount in RegBankCombiner (PR #131792)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 05:09:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
Canonicalize it to a G_AND instead so that ISel patterns can pick it
up and ignore it, as the shift instructions only read low bits.
G_ZEXT would be lowered to a v/s_and anyway in most cases.
I'm also looking at making a DAG version of this in a separate patch.
---
Full diff: https://github.com/llvm/llvm-project/pull/131792.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+17-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp (+30)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-amount-zext.mir (+146)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index a21505356274b..a0eb0ffea8d7e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -134,6 +134,22 @@ def combine_fmul_with_select_to_fldexp : GICombineRule<
[{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+// (shift x, (zext amt)) -> (shift x, (and (anyext amt), mask)
+//
+// The pattern is longer, but is better for matching during ISel.
+class canonicalize_zext_shift_amt<Instruction opc> : GICombineRule<
+ (defs root:$dst),
+ (match (G_ZEXT $amt, $amtsrc):$zext,
+ (opc $dst, $src, $amt):$shift),
+ (apply [{ applyCanonicalizeZextShiftAmt(*${shift}, *${zext}); }])>;
+
+def canonicalize_zext_lshr : canonicalize_zext_shift_amt<G_LSHR>;
+def canonicalize_zext_ashr : canonicalize_zext_shift_amt<G_ASHR>;
+def canonicalize_zext_shl : canonicalize_zext_shift_amt<G_SHL>;
+
+def zext_of_shift_amount_combines : GICombineGroup<[
+ canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl
+]>;
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
@@ -181,5 +197,5 @@ def AMDGPURegBankCombiner : GICombiner<
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
identity_combines, redundant_and, constant_fold_cast_op,
- cast_of_cast_combines]> {
+ cast_of_cast_combines, zext_of_shift_amount_combines]> {
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 98c48f4fe3705..68bffe5bbb7f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -87,6 +87,8 @@ class AMDGPURegBankCombinerImpl : public Combiner {
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
void applyClamp(MachineInstr &MI, Register &Reg) const;
+ void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const;
+
private:
SIModeRegisterDefaults getMode() const;
bool getIEEE() const;
@@ -362,6 +364,34 @@ void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
MI.eraseFromParent();
}
+void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
+ MachineInstr &MI, MachineInstr &Ext) const {
+ unsigned ShOpc = MI.getOpcode();
+ assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
+ ShOpc == AMDGPU::G_ASHR);
+ assert(Ext.getOpcode() == AMDGPU::G_ZEXT);
+
+ Register AmtReg = Ext.getOperand(1).getReg();
+ Register ShDst = MI.getOperand(0).getReg();
+ Register ShSrc = MI.getOperand(1).getReg();
+
+ LLT ExtAmtTy = MRI.getType(Ext.getOperand(0).getReg());
+ LLT AmtTy = MRI.getType(AmtReg);
+
+ auto &RB = *MRI.getRegBank(AmtReg);
+
+ auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
+ auto Mask = B.buildConstant(
+ ExtAmtTy, maskTrailingOnes<uint64_t>(AmtTy.getScalarSizeInBits()));
+ auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
+ B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
+
+ MRI.setRegBank(NewExt.getReg(0), RB);
+ MRI.setRegBank(Mask.getReg(0), RB);
+ MRI.setRegBank(And.getReg(0), RB);
+ MI.eraseFromParent();
+}
+
SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-amount-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-amount-zext.mir
new file mode 100644
index 0000000000000..77d30f6fa5223
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-amount-zext.mir
@@ -0,0 +1,146 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
+
+---
+name: lshr_zext_i16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: lshr_zext_i16
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_LSHR %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s16) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_LSHR %src, %zextamt
+ $sgpr0 = COPY %res
+...
+
+---
+name: ashr_zext_i16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: ashr_zext_i16
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_ASHR %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s16) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_ASHR %src, %zextamt
+ $sgpr0 = COPY %res
+...
+
+---
+name: shl_zext_i16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: shl_zext_i16
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_SHL %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s16) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_SHL %src, %zextamt
+ $sgpr0 = COPY %res
+...
+
+---
+name: lshr_zext_i8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: lshr_zext_i8
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_LSHR %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s8) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_LSHR %src, %zextamt
+ $sgpr0 = COPY %res
+...
+
+---
+name: ashr_zext_i8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: ashr_zext_i8
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_ASHR %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s8) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_ASHR %src, %zextamt
+ $sgpr0 = COPY %res
+...
+
+---
+name: shl_zext_i8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+
+ ; CHECK-LABEL: name: shl_zext_i8
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %src:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: %regamt:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND %regamt, [[C]]
+ ; CHECK-NEXT: %res:sgpr(s32) = G_SHL %src, [[AND]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY %res(s32)
+ %src:sgpr(s32) = COPY $sgpr0
+ %regamt:sgpr(s32) = COPY $sgpr1
+ %amt:sgpr(s8) = G_TRUNC %regamt
+ %zextamt:sgpr(s32) = G_ZEXT %amt
+ %res:sgpr(s32) = G_SHL %src, %zextamt
+ $sgpr0 = COPY %res
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/131792
More information about the llvm-commits
mailing list