[llvm] [AMDGPU] Added isCommutable attribute to V_ADD_NC_U16 (PR #111789)

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 20 01:11:02 PDT 2024


https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/111789

>From 1caeb5d78537379b9166bf7635db2b3864a73cb7 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Mon, 7 Oct 2024 12:03:22 +0530
Subject: [PATCH 1/2] added swap for imm values and global values

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 47 ++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0d153df5c3977c..38970a7a0ef86b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2742,6 +2742,50 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
   return &MI;
 }
 
+static MachineInstr *swapNonRegOperands(MachineInstr &MI,
+                                             MachineOperand &NonRegOp1,
+                                             MachineOperand &NonRegOp2) {
+  if (NonRegOp1.isImm() && NonRegOp2.isImm()){
+    auto TargetFlags = NonRegOp1.getTargetFlags();
+    auto NonRegVal = NonRegOp1.getImm();
+
+    NonRegOp1.setImm(NonRegOp2.getImm());
+    NonRegOp2.setImm(NonRegVal);
+    NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+    NonRegOp2.setTargetFlags(TargetFlags);
+  }
+  // --> Still working on the FrameInfo case :)
+  // else if (NonRegOp1.isFI() && NonRegOp2.isFI()){
+  //   auto TargetFlags = NonRegOp 1.getTargetFlags();
+  //   auto FrameIndex = NonRegOp1.getIndex();  
+  //   NonRegOp1.ChangeToFrameIndex(NonRegOp2.getIndex());  
+  //   NonRegOp2.ChangeToFrameIndex(FrameIndex);  
+  //   NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+  //   NonRegOp2.setTargetFlags(TargetFlags);
+  // }
+  else if (NonRegOp1.isGlobal() && NonRegOp2.isImm()){
+    auto TargetFlags = NonRegOp1.getTargetFlags();
+    auto GlobalVal = NonRegOp1.getGlobal();  
+    auto GlobalOffset = NonRegOp1.getOffset();  
+    NonRegOp1.ChangeToImmediate(NonRegOp2.getImm());  
+    NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
+    NonRegOp2.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);  
+    NonRegOp2.setTargetFlags(TargetFlags);
+  }
+  else if (NonRegOp1.isImm() && NonRegOp2.isGlobal()){
+    auto TargetFlags = NonRegOp2.getTargetFlags();
+    auto GlobalVal = NonRegOp2.getGlobal();  
+    auto GlobalOffset = NonRegOp2.getOffset();  
+    NonRegOp2.ChangeToImmediate(NonRegOp1.getImm());  
+    NonRegOp2.setTargetFlags(NonRegOp1.getTargetFlags());
+    NonRegOp1.ChangeToGA(GlobalVal, GlobalOffset, TargetFlags);  
+    NonRegOp1.setTargetFlags(TargetFlags);
+  }
+  else 
+    return nullptr;
+  return &MI;
+}
+
 MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                   unsigned Src0Idx,
                                                   unsigned Src1Idx) const {
@@ -2780,8 +2824,7 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     if (isOperandLegal(MI, Src1Idx, &Src0))
       CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
   } else {
-    // FIXME: Found two non registers to commute. This does happen.
-    return nullptr;
+      CommutedMI = swapNonRegOperands(MI, Src1, Src0);
   }
 
   if (CommutedMI) {

>From 51d02b43ed7053c707587c682fb5f031effda5d3 Mon Sep 17 00:00:00 2001
From: easyonaadit <aaditya.alokdeshpande at amd.com>
Date: Sun, 20 Oct 2024 13:40:33 +0530
Subject: [PATCH 2/2] marked V_ADD_NC_U16 as commutable

---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 78ca7a2f258cb3..44023e1f772392 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -871,7 +871,9 @@ let SubtargetPredicate = isGFX10Plus in {
     def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
   }
 
-  defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+  let isCommutable = 1 in {
+    defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
+  }
   defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
 
   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;



More information about the llvm-commits mailing list