[llvm] [AMDGPU] Added isCommutable attribute to V_ADD_NC_U16 (PR #111789)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 02:19:28 PDT 2024
https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/111789
>From 0f38c4defe03b3fb00b4fade0d37975219389b6d Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Mon, 7 Oct 2024 12:03:22 +0530
Subject: [PATCH 1/3] added isCommutable attribute to V_ADD_NC_U16
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..69a7a77f5ee8eb 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -870,9 +870,11 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
-
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+
+ let isCommutable = 1 in {
+ defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+ defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+ } // End isCommutable = 1
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
>From 93a657c7f257726578a1491517c9ac8ef22e20f1 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Thu, 10 Oct 2024 13:48:56 +0530
Subject: [PATCH 2/3] added swap for imm values and global values
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 48 +++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0d153df5c3977c..4d7daa21ffa033 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2742,6 +2742,50 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
return &MI;
}
+static MachineInstr *swapNonRegOperands(MachineInstr &MI,
+ MachineOperand &NonRegOp1,
+ MachineOperand &NonRegOp2) {
+ if (NonRegOp1.isImm() && NonRegOp2.isImm()){
+ auto TargetFlags = NonRegOp1.getTargetFlags();
+ auto NonRegVal = NonRegOp1.getImm();
+
+ NonRegOp1.setImm(NonRegOp2.getImm());
+ NonRegOp2.setImm(NonRegVal);
+ NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ NonRegOp2.setTargetFlags(TargetFlags);
+ }
+ // --> Still working on the FrameInfo case.
+ // else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
+ // auto TargetFlags = NonRegOp1.getTargetFlags();
+ // auto FrameIndex = NonRegOp1.getIndex();
+ // NonRegOp1.ChangeToFrameIndex(NonRegOp2.getIndex());
+ // NonRegOp2.ChangeToFrameIndex(FrameIndex);
+ // NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ // NonRegOp2.setTargetFlags(TargetFlags);
+ // }
+ else if (NonRegOp1.isGlobal() && NonRegOp2.isImm()){
+ auto TargetFlags = NonRegOp1.getTargetFlags();
+ auto GlobalVal = NonRegOp1.getGlobal();
+ auto GlobalOffset = NonRegOp1.getOffset();
+ NonRegOp1.ChangeToImmediate(NonRegOp2.getImm());
+ NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ NonRegOp2.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);
+ NonRegOp2.setTargetFlags(TargetFlags);
+ }
+ else if (NonRegOp1.isImm() && NonRegOp2.isGlobal()){
+ auto TargetFlags = NonRegOp2.getTargetFlags();
+ auto GlobalVal = NonRegOp2.getGlobal();
+ auto GlobalOffset = NonRegOp2.getOffset();
+ NonRegOp2.ChangeToImmediate(NonRegOp1.getImm());
+ NonRegOp2.setTargetFlags(NonRegOp1.getTargetFlags());
+ NonRegOp1.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);
+ NonRegOp1.setTargetFlags(TargetFlags);
+ }
+ else
+ return nullptr;
+ return &MI;
+}
+
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned Src0Idx,
unsigned Src1Idx) const {
@@ -2780,8 +2824,10 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
+ CommutedMI = swapNonRegOperands(MI, Src1, Src0);
+
// FIXME: Found two non registers to commute. This does happen.
- return nullptr;
+ // return nullptr;
}
if (CommutedMI) {
>From 58cd70f4c66c280b0399b6afe345557af16eb07c Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Thu, 10 Oct 2024 14:41:58 +0530
Subject: [PATCH 3/3] Modified test case, will be reverted back
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 +--
llvm/test/CodeGen/AMDGPU/commute-op-sel.mir | 37 +++++++++++++++++++--
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4d7daa21ffa033..af9224baf8d7b2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2754,7 +2754,7 @@ static MachineInstr *swapNonRegOperands(MachineInstr &MI,
NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
NonRegOp2.setTargetFlags(TargetFlags);
}
- // --> Still working on the FrameInfo case.
+ // --> Still working on the FrameInfo case :)
// else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
// auto TargetFlags = NonRegOp1.getTargetFlags();
// auto FrameIndex = NonRegOp1.getIndex();
@@ -2825,9 +2825,6 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
CommutedMI = swapNonRegOperands(MI, Src1, Src0);
-
- // FIXME: Found two non registers to commute. This does happen.
- // return nullptr;
}
if (CommutedMI) {
diff --git a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
index b9397f9d5d4ddc..b86778fe49e5eb 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
+++ b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
@@ -4,14 +4,47 @@
# GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+# ---
+# name: test_machine_cse_op_sel
+# body: |
+# bb.0:
+# %0:vgpr_32 = IMPLICIT_DEF
+# %1:vgpr_32 = IMPLICIT_DEF
+# %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
+# %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
+# DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+# ...
+
+--- |
+ @bar = internal global i32 10, align 4
+ @foo = internal global i32 10, align 4
+
+ define i32 @test_machine_cse_op_sel() {
+ entry:
+ %0 = load i32, ptr @bar, align 4
+ ret i32 %0
+ }
+...
---
name: test_machine_cse_op_sel
body: |
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
- %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
+
+ ; Case 1:
+ %2:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, 2, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = V_ADD_NC_U16_e64 0, 2, 0, 1, 0, 0, implicit $mode, implicit $exec
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+
+ ; Case 2:
+ %4:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, @bar, 0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, 1, 0, 0, implicit $mode, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %6:vgpr_32, %4, %5, 0, 1, 0, implicit $exec
+
+ ; Case 3:
+ ;%7:vgpr_32 = V_ADD_NC_U16_e64 0, @foo, 0, @bar, 0, 0, implicit $mode, implicit $exec
+ ;%8:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, @foo, 0, 0, implicit $mode, implicit $exec
+ ;DS_WRITE2_B32_gfx9 undef %9:vgpr_32, %7, %8, 0, 1, 0, implicit $exec
...
More information about the llvm-commits
mailing list