[llvm] dff3454 - [TwoAddressInstruction] Tweak constraining of tied operands

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 1 12:58:05 PDT 2021


Author: Jay Foad
Date: 2021-10-01T20:57:58+01:00
New Revision: dff3454bda097723799935e8ea7f026ff0626940

URL: https://github.com/llvm/llvm-project/commit/dff3454bda097723799935e8ea7f026ff0626940
DIFF: https://github.com/llvm/llvm-project/commit/dff3454bda097723799935e8ea7f026ff0626940.diff

LOG: [TwoAddressInstruction] Tweak constraining of tied operands

In collectTiedOperands, when handling an undef use that is tied to a
def, constrain the dst reg with the actual register class of the src
reg, instead of with the register class from the instructions's
MCInstrDesc. This makes a difference in some AMDGPU test cases like
this, before:

  %16:sgpr_96 = INSERT_SUBREG undef %15:sgpr_96_with_sub0_sub1(tied-def 0), killed %11:sreg_64_xexec, %subreg.sub0_sub1

After, without this patch:

  undef %16.sub0_sub1:sgpr_96 = COPY killed %11:sreg_64_xexec

This fails machine verification if you force it to run after
TwoAddressInstruction (currently it is disabled) with:

*** Bad machine code: Invalid register class for subregister index ***
- function:    s_load_constant_v3i32_align4
- basic block: %bb.0  (0xa011a88)
- instruction: undef %16.sub0_sub1:sgpr_96 = COPY killed %11:sreg_64_xexec
- operand 0:   undef %16.sub0_sub1:sgpr_96
Register class SGPR_96 does not fully support subreg index 4

After, with this patch:

  undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed %11:sreg_64_xexec

See also svn r159120 which introduced the code to handle tied undef
uses.

Differential Revision: https://reviews.llvm.org/D110944

Added: 
    

Modified: 
    llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
    llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0fa580fb4c13d..94b456895e003 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1335,7 +1335,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 // Return true if any tied operands where found, including the trivial ones.
 bool TwoAddressInstructionPass::
 collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
-  const MCInstrDesc &MCID = MI->getDesc();
   bool AnyOps = false;
   unsigned NumOps = MI->getNumOperands();
 
@@ -1357,10 +1356,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
     // Deal with undef uses immediately - simply rewrite the src operand.
     if (SrcMO.isUndef() && !DstMO.getSubReg()) {
       // Constrain the DstReg register class if required.
-      if (DstReg.isVirtual())
-        if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
-                                                             TRI, *MF))
-          MRI->constrainRegClass(DstReg, RC);
+      if (DstReg.isVirtual()) {
+        const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+        MRI->constrainRegClass(DstReg, RC);
+      }
       SrcMO.setReg(DstReg);
       SrcMO.setSubReg(0);
       LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);

diff  --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
index a3b7951004140..ae0c8b57b0645 100644
--- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
+++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll
@@ -1,8 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after twoaddressinstruction < %s | FileCheck %s
 
-; FIXME: the operand "undef %16.sub0_sub1:sgpr_96" will fail machine
-; verification because sgpr_96 does not fully support sub0_sub1.
+; Check that %16 gets constrained to register class sgpr_96_with_sub0_sub1.
 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
   ; CHECK-LABEL: name: s_load_constant_v3i32_align4
   ; CHECK: bb.0 (%ir-block.0):
@@ -14,9 +13,9 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)*
   ; CHECK-NEXT:   %0.sub1:sreg_64 = COPY killed [[COPY1]]
   ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4)
   ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4)
-  ; CHECK-NEXT:   undef %16.sub0_sub1:sgpr_96 = COPY killed [[S_LOAD_DWORDX2_IMM]]
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_96 = COPY killed %16
-  ; CHECK-NEXT:   [[COPY2]].sub2:sgpr_96 = COPY undef [[S_LOAD_DWORD_IMM]]
+  ; CHECK-NEXT:   undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]]
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16
+  ; CHECK-NEXT:   [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]]
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0
   ; CHECK-NEXT:   $sgpr0 = COPY killed [[COPY3]]
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1


        


More information about the llvm-commits mailing list