[llvm] c7cff08 - AMDGPU: Fix assert when rewriting saddr d16 loads

Fri May 14 10:24:24 PDT 2021

Author: Matt Arsenault
Date: 2021-05-14T13:24:19-04:00
New Revision: c7cff08f79f775d70f0241f4671d66586a9e17a7

URL: https://github.com/llvm/llvm-project/commit/c7cff08f79f775d70f0241f4671d66586a9e17a7
DIFF: https://github.com/llvm/llvm-project/commit/c7cff08f79f775d70f0241f4671d66586a9e17a7.diff

LOG: AMDGPU: Fix assert when rewriting saddr d16 loads

moveOperands does not handle moving tied operands since it would
generally have to fixup the tied operand references. Avoid the assert
by untying and retying after the modification. These in place
modifications really aren't managable.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8858d90bb709..498c6f021db9 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5034,8 +5034,24 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
   } else {
     assert(OldSAddrIdx == NewVAddrIdx);
 
-    if (OldVAddrIdx >= 0)
+    if (OldVAddrIdx >= 0) {
+      int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
+                                                 AMDGPU::OpName::vdst_in);
+
+      // RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
+      // it asserts. Untie the operands for now and retie them afterwards.
+      if (NewVDstIn != -1) {
+        int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+        Inst.untieRegOperand(OldVDstIn);
+      }
+
       Inst.RemoveOperand(OldVAddrIdx);
+
+      if (NewVDstIn != -1) {
+        int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
+        Inst.tieOperands(NewVDst, NewVDstIn);
+      }
+    }
   }
 
   if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))

diff  --git a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
index 54aa46ebc393..c897d143cd5c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
@@ -31,3 +31,27 @@ bb3:                                              ; preds = %bb3, %bb
   %i9 = icmp eq i32 %i8, 256
   br i1 %i9, label %bb2, label %bb3
 }
+
+; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
+; GCN-NOT: v_readfirstlane_b32
+; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
+define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
+bb:
+  %i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
+  %load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4
+  %i2 = zext i16 %load.pre to i32
+  br label %bb3
+
+bb2:                                              ; preds = %bb3
+  ret void
+
+bb3:                                              ; preds = %bb3, %bb
+  %i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
+  %i4 = zext i32 %i to i64
+  %i5 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %i4
+  %i6 = load volatile i16, i16 addrspace(1)* %i5, align 4
+  %insertelt = insertelement <2 x i16> undef, i16 %i6, i32 1
+  %i8 =  bitcast <2 x i16> %insertelt to i32
+  %i9 = icmp eq i32 %i8, 256
+  br i1 %i9, label %bb2, label %bb3
+}