[llvm] r352720 - GlobalISel: Handle odd splits in fewerElementsVector for load/store

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 30 18:46:05 PST 2019


Author: arsenm
Date: Wed Jan 30 18:46:05 2019
New Revision: 352720

URL: http://llvm.org/viewvc/llvm-project?rev=352720&view=rev
Log:
GlobalISel: Handle odd splits in fewerElementsVector for load/store

Modified:
    llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
    llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir

Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h?rev=352720&r1=352719&r2=352720&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h Wed Jan 30 18:46:05 2019
@@ -126,6 +126,25 @@ private:
   void extractParts(unsigned Reg, LLT Ty, int NumParts,
                     SmallVectorImpl<unsigned> &VRegs);
 
+  /// Version which handles irregular splits.
+  bool extractParts(unsigned Reg, LLT RegTy, LLT MainTy,
+                    LLT &LeftoverTy,
+                    SmallVectorImpl<unsigned> &VRegs,
+                    SmallVectorImpl<unsigned> &LeftoverVRegs);
+
+  /// Helper function to build a wide generic register \p DstReg of type \p
+  /// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
+  /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
+  /// for the types.
+  ///
+  /// \p PartRegs must be registers of type \p PartTy.
+  ///
+  /// If \p ResultTy does not evenly break into \p PartTy sized pieces, the
+  /// remainder must be specified with \p LeftoverRegs of type \p LeftoverTy.
+  void insertParts(unsigned DstReg, LLT ResultTy,
+                   LLT PartTy, ArrayRef<unsigned> PartRegs,
+                   LLT LeftoverTy = LLT(), ArrayRef<unsigned> LeftoverRegs = {});
+
   LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
                                                 unsigned TypeIdx, LLT NarrowTy);
 

Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=352720&r1=352719&r2=352720&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Wed Jan 30 18:46:05 2019
@@ -86,6 +86,91 @@ void LegalizerHelper::extractParts(unsig
   MIRBuilder.buildUnmerge(VRegs, Reg);
 }
 
+bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy,
+                                   LLT MainTy, LLT &LeftoverTy,
+                                   SmallVectorImpl<unsigned> &VRegs,
+                                   SmallVectorImpl<unsigned> &LeftoverRegs) {
+  assert(!LeftoverTy.isValid() && "this is an out argument");
+
+  unsigned RegSize = RegTy.getSizeInBits();
+  unsigned MainSize = MainTy.getSizeInBits();
+  unsigned NumParts = RegSize / MainSize;
+  unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+  // Use an unmerge when possible.
+  if (LeftoverSize == 0) {
+    for (unsigned I = 0; I < NumParts; ++I)
+      VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+    MIRBuilder.buildUnmerge(VRegs, Reg);
+    return true;
+  }
+
+  if (MainTy.isVector()) {
+    unsigned EltSize = MainTy.getScalarSizeInBits();
+    if (LeftoverSize % EltSize != 0)
+      return false;
+    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+  } else {
+    LeftoverTy = LLT::scalar(LeftoverSize);
+  }
+
+  // For irregular sizes, extract the individual parts.
+  for (unsigned I = 0; I != NumParts; ++I) {
+    unsigned NewReg = MRI.createGenericVirtualRegister(MainTy);
+    VRegs.push_back(NewReg);
+    MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+  }
+
+  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+       Offset += LeftoverSize) {
+    unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+    LeftoverRegs.push_back(NewReg);
+    MIRBuilder.buildExtract(NewReg, Reg, Offset);
+  }
+
+  return true;
+}
+
+void LegalizerHelper::insertParts(unsigned DstReg,
+                                  LLT ResultTy, LLT PartTy,
+                                  ArrayRef<unsigned> PartRegs,
+                                  LLT LeftoverTy,
+                                  ArrayRef<unsigned> LeftoverRegs) {
+  if (!LeftoverTy.isValid()) {
+    assert(LeftoverRegs.empty());
+
+    if (PartTy.isVector())
+      MIRBuilder.buildConcatVectors(DstReg, PartRegs);
+    else
+      MIRBuilder.buildBuildVector(DstReg, PartRegs);
+    return;
+  }
+
+  unsigned PartSize = PartTy.getSizeInBits();
+  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
+
+  unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
+  MIRBuilder.buildUndef(CurResultReg);
+
+  unsigned Offset = 0;
+  for (unsigned PartReg : PartRegs) {
+    unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
+    MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
+    CurResultReg = NewResultReg;
+    Offset += PartSize;
+  }
+
+  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
+    // Use the original output register for the final insert to avoid a copy.
+    unsigned NewResultReg = (I + 1 == E) ?
+      DstReg : MRI.createGenericVirtualRegister(ResultTy);
+
+    MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
+    CurResultReg = NewResultReg;
+    Offset += LeftoverPartSize;
+  }
+}
+
 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
   switch (Opcode) {
   case TargetOpcode::G_SDIV:
@@ -1810,6 +1895,36 @@ LegalizerHelper::fewerElementsVectorSele
   return Legalized;
 }
 
+/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
+///
+/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
+/// with any leftover piece as type \p LeftoverTy
+///
+/// Returns -1 if the breakdown is not satisfiable.
+static int getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+  assert(!LeftoverTy.isValid() && "this is an out argument");
+
+  unsigned Size = OrigTy.getSizeInBits();
+  unsigned NarrowSize = NarrowTy.getSizeInBits();
+  unsigned NumParts = Size / NarrowSize;
+  unsigned LeftoverSize = Size - NumParts * NarrowSize;
+  assert(Size > NarrowSize);
+
+  if (LeftoverSize == 0)
+    return NumParts;
+
+  if (NarrowTy.isVector()) {
+    unsigned EltSize = OrigTy.getScalarSizeInBits();
+    if (LeftoverSize % EltSize != 0)
+      return -1;
+    LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+  } else {
+    LeftoverTy = LLT::scalar(LeftoverSize);
+  }
+
+  return NumParts;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx,
                                               LLT NarrowTy) {
@@ -1828,40 +1943,68 @@ LegalizerHelper::fewerElementsVectorLoad
   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
   unsigned ValReg = MI.getOperand(0).getReg();
   unsigned AddrReg = MI.getOperand(1).getReg();
-  unsigned NarrowSize = NarrowTy.getSizeInBits();
-  unsigned Size = MRI.getType(ValReg).getSizeInBits();
-  unsigned NumParts = Size / NarrowSize;
+  LLT ValTy = MRI.getType(ValReg);
 
-  SmallVector<unsigned, 8> NarrowRegs;
-  if (!IsLoad)
-    extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
-
-  const LLT OffsetTy =
-    LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
-  MachineFunction &MF = *MI.getMF();
-
-  for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
-    unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
-    unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
-    unsigned NewAddrReg = 0;
-    MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
-    MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
-      MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
-      NarrowTy.getSizeInBits() / 8, Alignment);
-    if (IsLoad) {
-      unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
-      NarrowRegs.push_back(Dst);
-      MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
-    } else {
-      MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
-    }
+  int NumParts = -1;
+  LLT LeftoverTy;
+  SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs;
+  if (IsLoad) {
+    NumParts = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+  } else {
+    if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
+                     NarrowLeftoverRegs))
+      NumParts = NarrowRegs.size();
   }
+
+  if (NumParts == -1)
+    return UnableToLegalize;
+
+  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+
+  unsigned TotalSize = ValTy.getSizeInBits();
+
+  // Split the load/store into PartTy sized pieces starting at Offset. If this
+  // is a load, return the new registers in ValRegs. For a store, each elements
+  // of ValRegs should be PartTy. Returns the next offset that needs to be
+  // handled.
+  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs,
+                             unsigned Offset) -> unsigned {
+    MachineFunction &MF = MIRBuilder.getMF();
+    unsigned PartSize = PartTy.getSizeInBits();
+    for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
+         Offset += PartSize, ++Idx) {
+      unsigned ByteSize = PartSize / 8;
+      unsigned ByteOffset = Offset / 8;
+      unsigned NewAddrReg = 0;
+
+      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+
+      MachineMemOperand *NewMMO =
+        MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
+
+      if (IsLoad) {
+        unsigned Dst = MRI.createGenericVirtualRegister(PartTy);
+        ValRegs.push_back(Dst);
+        MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
+      } else {
+        MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
+      }
+    }
+
+    return Offset;
+  };
+
+  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+
+  // Handle the rest of the register if this isn't an even type breakdown.
+  if (LeftoverTy.isValid())
+    splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+
   if (IsLoad) {
-    if (NarrowTy.isVector())
-      MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
-    else
-      MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
+    insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
+                LeftoverTy, NarrowLeftoverRegs);
   }
+
   MI.eraseFromParent();
   return Legalized;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=352720&r1=352719&r2=352720&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Wed Jan 30 18:46:05 2019
@@ -248,7 +248,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
       })
     .fewerElementsIf([=, &ST](const LegalityQuery &Query) {
         unsigned MemSize = Query.MMODescrs[0].SizeInBits;
-        return Query.Types[0].isVector() && (MemSize == 96) &&
+        return (MemSize == 96) &&
+               Query.Types[0].isVector() &&
                ST.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS;
       },
       [=](const LegalityQuery &Query) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir?rev=352720&r1=352719&r2=352720&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir Wed Jan 30 18:46:05 2019
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
 
 ---
 name: test_load_global_i32
@@ -7,13 +8,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_i32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-LABEL: name: test_load_global_i32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-LABEL: name: test_load_global_i32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
-
     $vgpr0 = COPY %1
 ...
 
@@ -23,13 +27,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_i64
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: $vgpr0 = COPY [[LOAD]](s32)
+    ; SI-LABEL: name: test_load_global_i64
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-LABEL: name: test_load_global_i64
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: $vgpr0 = COPY [[LOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 1)
-
     $vgpr0 = COPY %1
 ...
 
@@ -39,13 +46,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_p1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; SI-LABEL: name: test_load_global_p1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; VI-LABEL: name: test_load_global_p1
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = G_LOAD %0 :: (load 8, addrspace 1)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
@@ -55,30 +65,35 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_p4
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; SI-LABEL: name: test_load_global_p4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
+    ; VI-LABEL: name: test_load_global_p4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = G_LOAD %0 :: (load 8, addrspace 1)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
-
 ---
 name: test_load_global_p3
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_p3
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: $vgpr0 = COPY [[LOAD]](p3)
+    ; SI-LABEL: name: test_load_global_p3
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](p3)
+    ; VI-LABEL: name: test_load_global_p3
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: $vgpr0 = COPY [[LOAD]](p3)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = G_LOAD %0 :: (load 4, addrspace 1)
-
     $vgpr0 = COPY %1
 ...
 
@@ -88,13 +103,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; SI-LABEL: name: test_load_global_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; SI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+    ; VI-LABEL: name: test_load_global_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; VI: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = G_LOAD %0 :: (load 8, addrspace 1)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
@@ -105,10 +123,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_v2s16
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; SI-LABEL: name: test_load_global_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; VI-LABEL: name: test_load_global_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: $vgpr0 = COPY [[LOAD]](<2 x s16>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = G_LOAD %0 :: (load 4, addrspace 1)
     $vgpr0 = COPY %1
@@ -120,13 +142,22 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_load_global_v3i32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
-    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    ; SI-LABEL: name: test_load_global_v3i32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
+    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; VI-LABEL: name: test_load_global_v3i32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
-
     $vgpr0_vgpr1_vgpr2 = COPY %1
 ...
 
@@ -136,14 +167,18 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-LABEL: name: test_ext_load_global_s64_from_1_align1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-LABEL: name: test_ext_load_global_s64_from_1_align1
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
@@ -153,14 +188,18 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-LABEL: name: test_ext_load_global_s64_from_2_align2
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-LABEL: name: test_ext_load_global_s64_from_2_align2
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
@@ -170,14 +209,18 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    ; VI-LABEL: name: test_ext_load_global_s64_from_4_align4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
-
     $vgpr0_vgpr1 = COPY %1
 ...
 
@@ -187,11 +230,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
-    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
+    ; SI-LABEL: name: test_ext_load_global_s128_from_4_align4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
+    ; VI-LABEL: name: test_ext_load_global_s128_from_4_align4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -316,3 +364,28 @@ body: |
      $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
 
 ...
+
+---
+name: test_load_global_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; SI-LABEL: name: test_load_global_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load 8, align 16, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+    ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, align 8, addrspace 1)
+    ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
+    ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+    ; VI-LABEL: name: test_load_global_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(<3 x s32>) = G_LOAD %0 :: (load 12, addrspace 1, align 16)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir?rev=352720&r1=352719&r2=352720&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir Wed Jan 30 18:46:05 2019
@@ -1,16 +1,20 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s
-
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
 ---
 name: test_store_global_i32
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2
 
-    ; CHECK-LABEL: name: test_store_global_i32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; SI-LABEL: name: test_store_global_i32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_store_global_i32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -22,10 +26,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_store_global_i64
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; SI-LABEL: name: test_store_global_i64
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; VI-LABEL: name: test_store_global_i64
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -37,10 +45,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_store_global_p1
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
-    ; CHECK: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; SI-LABEL: name: test_store_global_p1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
+    ; SI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; VI-LABEL: name: test_store_global_p1
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3
+    ; VI: G_STORE [[COPY1]](p1), [[COPY]](p1) :: (store 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p1) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -52,10 +64,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_store_global_p4
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
-    ; CHECK: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; SI-LABEL: name: test_store_global_p4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
+    ; SI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; VI-LABEL: name: test_store_global_p4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY $vgpr2_vgpr3
+    ; VI: G_STORE [[COPY1]](p4), [[COPY]](p1) :: (store 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p4) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -67,10 +83,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2
 
-    ; CHECK-LABEL: name: test_store_global_p3
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
-    ; CHECK: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; SI-LABEL: name: test_store_global_p3
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; SI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_store_global_p3
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr2
+    ; VI: G_STORE [[COPY1]](p3), [[COPY]](p1) :: (store 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(p3) = COPY $vgpr2
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -82,10 +102,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_store_global_v2s32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; CHECK: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; SI-LABEL: name: test_store_global_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1)
+    ; VI-LABEL: name: test_store_global_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI: G_STORE [[COPY1]](<2 x s32>), [[COPY]](p1) :: (store 8, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 8, addrspace 1)
@@ -97,10 +121,14 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2
 
-    ; CHECK-LABEL: name: test_store_global_v2s16
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
-    ; CHECK: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; SI-LABEL: name: test_store_global_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; SI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_store_global_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+    ; VI: G_STORE [[COPY1]](<2 x s16>), [[COPY]](p1) :: (store 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<2 x s16>) = COPY $vgpr2
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -112,10 +140,19 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
 
-    ; CHECK-LABEL: name: test_store_global_v3s32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; CHECK: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1)
+    ; SI-LABEL: name: test_store_global_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0
+    ; SI: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64
+    ; SI: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store 8, align 4, addrspace 1)
+    ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; SI: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[COPY]], [[C]](s64)
+    ; SI: G_STORE [[EXTRACT1]](s32), [[GEP]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_store_global_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    ; VI: G_STORE [[COPY1]](<3 x s32>), [[COPY]](p1) :: (store 12, align 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store 12, align 4, addrspace 1)
@@ -127,11 +164,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s8
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; SI-LABEL: name: test_truncstore_global_s64_to_s8
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI-LABEL: name: test_truncstore_global_s64_to_s8
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 1, addrspace 1)
@@ -143,11 +185,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s16
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; SI-LABEL: name: test_truncstore_global_s64_to_s16
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI-LABEL: name: test_truncstore_global_s64_to_s16
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 1, addrspace 1)
@@ -159,11 +206,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s32
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; SI-LABEL: name: test_truncstore_global_s64_to_s32
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_truncstore_global_s64_to_s32
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -175,11 +227,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
 
-    ; CHECK-LABEL: name: test_truncstore_global_s128_to_s16
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
-    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store 2, addrspace 1)
@@ -191,11 +238,16 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
 
-    ; CHECK-LABEL: name: test_truncstore_global_s128_to_s8
-    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
-    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; SI-LABEL: name: test_truncstore_global_s128_to_s16
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
+    ; SI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI-LABEL: name: test_truncstore_global_s128_to_s16
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
+    ; VI: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store 1, addrspace 1)




More information about the llvm-commits mailing list