[llvm] [AMDGPU] Added isCommutable attribute to V_ADD_NC_U16 (PR #111789)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 20 00:59:58 PDT 2024
https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/111789
>From 0f38c4defe03b3fb00b4fade0d37975219389b6d Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Mon, 7 Oct 2024 12:03:22 +0530
Subject: [PATCH 1/7] added isCommutable attribute to V_ADD_NC_U16
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..69a7a77f5ee8eb 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -870,9 +870,11 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
-
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+
+ let isCommutable = 1 in {
+ defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+ defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+ } // End isCommutable = 1
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
>From 93a657c7f257726578a1491517c9ac8ef22e20f1 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Thu, 10 Oct 2024 13:48:56 +0530
Subject: [PATCH 2/7] added swap for imm values and global values
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 48 +++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0d153df5c3977c..4d7daa21ffa033 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2742,6 +2742,50 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
return &MI;
}
+static MachineInstr *swapNonRegOperands(MachineInstr &MI,
+ MachineOperand &NonRegOp1,
+ MachineOperand &NonRegOp2) {
+ if (NonRegOp1.isImm() && NonRegOp2.isImm()){
+ auto TargetFlags = NonRegOp1.getTargetFlags();
+ auto NonRegVal = NonRegOp1.getImm();
+
+ NonRegOp1.setImm(NonRegOp2.getImm());
+ NonRegOp2.setImm(NonRegVal);
+ NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ NonRegOp2.setTargetFlags(TargetFlags);
+ }
+ // --> Still working on the FrameInfo case.
+ // else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
+ // auto TargetFlags = NonRegOp1.getTargetFlags();
+ // auto FrameIndex = NonRegOp1.getIndex();
+ // NonRegOp1.ChangeToFrameIndex(NonRegOp2.getIndex());
+ // NonRegOp2.ChangeToFrameIndex(FrameIndex);
+ // NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ // NonRegOp2.setTargetFlags(TargetFlags);
+ // }
+ else if (NonRegOp1.isGlobal() && NonRegOp2.isImm()){
+ auto TargetFlags = NonRegOp1.getTargetFlags();
+ auto GlobalVal = NonRegOp1.getGlobal();
+ auto GlobalOffset = NonRegOp1.getOffset();
+ NonRegOp1.ChangeToImmediate(NonRegOp2.getImm());
+ NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+ NonRegOp2.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);
+ NonRegOp2.setTargetFlags(TargetFlags);
+ }
+ else if (NonRegOp1.isImm() && NonRegOp2.isGlobal()){
+ auto TargetFlags = NonRegOp2.getTargetFlags();
+ auto GlobalVal = NonRegOp2.getGlobal();
+ auto GlobalOffset = NonRegOp2.getOffset();
+ NonRegOp2.ChangeToImmediate(NonRegOp1.getImm());
+ NonRegOp2.setTargetFlags(NonRegOp1.getTargetFlags());
+ NonRegOp1.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);
+ NonRegOp1.setTargetFlags(TargetFlags);
+ }
+ else
+ return nullptr;
+ return &MI;
+}
+
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned Src0Idx,
unsigned Src1Idx) const {
@@ -2780,8 +2824,10 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
+ CommutedMI = swapNonRegOperands(MI, Src1, Src0);
+
// FIXME: Found two non registers to commute. This does happen.
- return nullptr;
+ // return nullptr;
}
if (CommutedMI) {
>From 58cd70f4c66c280b0399b6afe345557af16eb07c Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Thu, 10 Oct 2024 14:41:58 +0530
Subject: [PATCH 3/7] Modified test case, will be reverted back
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 +--
llvm/test/CodeGen/AMDGPU/commute-op-sel.mir | 37 +++++++++++++++++++--
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4d7daa21ffa033..af9224baf8d7b2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2754,7 +2754,7 @@ static MachineInstr *swapNonRegOperands(MachineInstr &MI,
NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
NonRegOp2.setTargetFlags(TargetFlags);
}
- // --> Still working on the FrameInfo case.
+ // --> Still working on the FrameInfo case :)
// else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
// auto TargetFlags = NonRegOp1.getTargetFlags();
// auto FrameIndex = NonRegOp1.getIndex();
@@ -2825,9 +2825,6 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
CommutedMI = swapNonRegOperands(MI, Src1, Src0);
-
- // FIXME: Found two non registers to commute. This does happen.
- // return nullptr;
}
if (CommutedMI) {
diff --git a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
index b9397f9d5d4ddc..b86778fe49e5eb 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
+++ b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
@@ -4,14 +4,47 @@
# GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+# ---
+# name: test_machine_cse_op_sel
+# body: |
+# bb.0:
+# %0:vgpr_32 = IMPLICIT_DEF
+# %1:vgpr_32 = IMPLICIT_DEF
+# %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
+# %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
+# DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+# ...
+
+--- |
+ @bar = internal global i32 10, align 4
+ @foo = internal global i32 10, align 4
+
+ define i32 @test_machine_cse_op_sel() {
+ entry:
+ %0 = load i32, ptr @bar, align 4
+ ret i32 %0
+ }
+...
---
name: test_machine_cse_op_sel
body: |
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
- %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
- %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
+
+ ; Case 1:
+ %2:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, 2, 0, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = V_ADD_NC_U16_e64 0, 2, 0, 1, 0, 0, implicit $mode, implicit $exec
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
+
+ ; Case 2:
+ %4:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, @bar, 0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, 1, 0, 0, implicit $mode, implicit $exec
+ DS_WRITE2_B32_gfx9 undef %6:vgpr_32, %4, %5, 0, 1, 0, implicit $exec
+
+ ; Case 3:
+ ;%7:vgpr_32 = V_ADD_NC_U16_e64 0, @foo, 0, @bar, 0, 0, implicit $mode, implicit $exec
+ ;%8:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, @foo, 0, 0, implicit $mode, implicit $exec
+ ;DS_WRITE2_B32_gfx9 undef %9:vgpr_32, %7, %8, 0, 1, 0, implicit $exec
...
>From 34f871f30da85bd657137b52df4258b9e8edb806 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Mon, 14 Oct 2024 10:52:30 +0530
Subject: [PATCH 4/7] temp commit
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index af9224baf8d7b2..38970a7a0ef86b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2756,7 +2756,7 @@ static MachineInstr *swapNonRegOperands(MachineInstr &MI,
}
// --> Still working on the FrameInfo case :)
// else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
- // auto TargetFlags = NonRegOp1.getTargetFlags();
+ // auto TargetFlags = NonRegOp 1.getTargetFlags();
// auto FrameIndex = NonRegOp1.getIndex();
// NonRegOp1.ChangeToFrameIndex(NonRegOp2.getIndex());
// NonRegOp2.ChangeToFrameIndex(FrameIndex);
>From 5d592ba7302dd961f4d3e24fd710abbc30139d18 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Sun, 20 Oct 2024 13:26:02 +0530
Subject: [PATCH 5/7] temp commit
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 69a7a77f5ee8eb..806f7703b0bf5f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -871,10 +871,8 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
- let isCommutable = 1 in {
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
- } // End isCommutable = 1
+ defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+ defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
>From 69a7caacae54fc1b0499515d60d64fa3826231de Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Sun, 20 Oct 2024 13:27:53 +0530
Subject: [PATCH 6/7] temp commit
---
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 806f7703b0bf5f..78ca7a2f258cb3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -870,7 +870,7 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
}
-
+
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
>From a6e4a13f0cb5a36b73508cbbc9eef67c7dfba411 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Sun, 20 Oct 2024 13:29:42 +0530
Subject: [PATCH 7/7] temp
---
llvm/test/CodeGen/AMDGPU/commute-op-sel.mir | 40 ++-------------------
1 file changed, 3 insertions(+), 37 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
index b86778fe49e5eb..8eee879d302949 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
+++ b/llvm/test/CodeGen/AMDGPU/commute-op-sel.mir
@@ -5,46 +5,12 @@
# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
# ---
-# name: test_machine_cse_op_sel
-# body: |
-# bb.0:
-# %0:vgpr_32 = IMPLICIT_DEF
-# %1:vgpr_32 = IMPLICIT_DEF
-# %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
-# %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
-# DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
-# ...
-
---- |
- @bar = internal global i32 10, align 4
- @foo = internal global i32 10, align 4
-
- define i32 @test_machine_cse_op_sel() {
- entry:
- %0 = load i32, ptr @bar, align 4
- ret i32 %0
- }
-...
----
name: test_machine_cse_op_sel
body: |
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
-
- ; Case 1:
- %2:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, 2, 0, 0, implicit $mode, implicit $exec
- %3:vgpr_32 = V_ADD_NC_U16_e64 0, 2, 0, 1, 0, 0, implicit $mode, implicit $exec
+ %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
+ %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
-
- ; Case 2:
- %4:vgpr_32 = V_ADD_NC_U16_e64 0, 1, 0, @bar, 0, 0, implicit $mode, implicit $exec
- %5:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, 1, 0, 0, implicit $mode, implicit $exec
- DS_WRITE2_B32_gfx9 undef %6:vgpr_32, %4, %5, 0, 1, 0, implicit $exec
-
- ; Case 3:
- ;%7:vgpr_32 = V_ADD_NC_U16_e64 0, @foo, 0, @bar, 0, 0, implicit $mode, implicit $exec
- ;%8:vgpr_32 = V_ADD_NC_U16_e64 0, @bar, 0, @foo, 0, 0, implicit $mode, implicit $exec
- ;DS_WRITE2_B32_gfx9 undef %9:vgpr_32, %7, %8, 0, 1, 0, implicit $exec
-...
-
+...
\ No newline at end of file
More information about the llvm-commits
mailing list