[llvm] [AMDGPU] Added isCommutable attribute to V_ADD_NC_U16 (PR #111789)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 10 01:19:12 PDT 2024


https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/111789

>From 0f38c4defe03b3fb00b4fade0d37975219389b6d Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Mon, 7 Oct 2024 12:03:22 +0530
Subject: [PATCH 1/2] added isCommutable attribute to V_ADD_NC_U16

---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..69a7a77f5ee8eb 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -870,9 +870,11 @@ let SubtargetPredicate = isGFX10Plus in {
     def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
     def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
   }
-
-  defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
-  defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+  
+  let isCommutable = 1 in {
+    defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+    defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
+  } // End isCommutable = 1
 
   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
   def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;

>From 93a657c7f257726578a1491517c9ac8ef22e20f1 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Thu, 10 Oct 2024 13:48:56 +0530
Subject: [PATCH 2/2] added swap for imm values and global values

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 48 +++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0d153df5c3977c..4d7daa21ffa033 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2742,6 +2742,50 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
   return &MI;
 }
 
+static MachineInstr *swapNonRegOperands(MachineInstr &MI,
+                                             MachineOperand &NonRegOp1,
+                                             MachineOperand &NonRegOp2) {
+  if (NonRegOp1.isImm() && NonRegOp2.isImm()){
+    auto TargetFlags = NonRegOp1.getTargetFlags();
+    auto NonRegVal = NonRegOp1.getImm();
+
+    NonRegOp1.setImm(NonRegOp2.getImm());
+    NonRegOp2.setImm(NonRegVal);
+    NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+    NonRegOp2.setTargetFlags(TargetFlags);
+  }
+  // --> Still working on the FrameInfo case.
+  // else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
+  //   auto TargetFlags = NonRegOp1.getTargetFlags();
+  //   auto FrameIndex = NonRegOp1.getIndex();  
+  //   NonRegOp1.ChangeToFrameIndex(NonRegOp2.getIndex());  
+  //   NonRegOp2.ChangeToFrameIndex(FrameIndex);  
+  //   NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+  //   NonRegOp2.setTargetFlags(TargetFlags);
+  // }
+  else if (NonRegOp1.isGlobal() && NonRegOp2.isImm()){
+    auto TargetFlags = NonRegOp1.getTargetFlags();
+    auto GlobalVal = NonRegOp1.getGlobal();  
+    auto GlobalOffset = NonRegOp1.getOffset();  
+    NonRegOp1.ChangeToImmediate(NonRegOp2.getImm());  
+    NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+    NonRegOp2.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);  
+    NonRegOp2.setTargetFlags(TargetFlags);
+  }
+  else if (NonRegOp1.isImm() && NonRegOp2.isGlobal()){
+    auto TargetFlags = NonRegOp2.getTargetFlags();
+    auto GlobalVal = NonRegOp2.getGlobal();  
+    auto GlobalOffset = NonRegOp2.getOffset();  
+    NonRegOp2.ChangeToImmediate(NonRegOp1.getImm());  
+    NonRegOp2.setTargetFlags(NonRegOp1.getTargetFlags());
+    NonRegOp1.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);  
+    NonRegOp1.setTargetFlags(TargetFlags);
+  }
+  else 
+    return nullptr;
+  return &MI;
+}
+
 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                   unsigned Src0Idx,
                                                   unsigned Src1Idx) const {
@@ -2780,8 +2824,10 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     if (isOperandLegal(MI, Src1Idx, &Src0))
       CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
   } else {
+      CommutedMI = swapNonRegOperands(MI, Src1, Src0);
+    
     // FIXME: Found two non registers to commute. This does happen.
-    return nullptr;
+    // return nullptr;
   }
 
   if (CommutedMI) {



More information about the llvm-commits mailing list