[llvm] bc27a31 - [AMDGPU] Fix copyPhysReg to not produce unalined vgpr access

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 15 14:14:39 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-03-15T14:14:30-07:00
New Revision: bc27a31801a3ec79e860aba9ac5df77a4d73e330

URL: https://github.com/llvm/llvm-project/commit/bc27a31801a3ec79e860aba9ac5df77a4d73e330
DIFF: https://github.com/llvm/llvm-project/commit/bc27a31801a3ec79e860aba9ac5df77a4d73e330.diff

LOG: [AMDGPU] Fix copyPhysReg to not produce unalined vgpr access

RA can insert something like a sub1_sub2 COPY of a wide VGPR
tuple which results in the unaligned acces with v_pk_mov_b32
after the copy is expanded. This is regression after D97316.

Differential Revision: https://reviews.llvm.org/D98549

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/lib/Target/AMDGPU/SIRegisterInfo.h
    llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4035ff1817c1..4c68247b07a4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -875,8 +875,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC->hasSuperClassEq(&AMDGPU::VReg_64RegClass) &&
-      !RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+  const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg);
+  if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
     if (ST.hasPackedFP32Ops()) {
       BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg)
         .addImm(SISrcMods::OP_SEL_1)
@@ -895,7 +895,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
   if (RI.isSGPRClass(RC)) {
-    if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
+    if (!RI.isSGPRClass(SrcRC)) {
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
@@ -906,12 +906,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   unsigned EltSize = 4;
   unsigned Opcode = AMDGPU::V_MOV_B32_e32;
   if (RI.hasAGPRs(RC)) {
-    Opcode = (RI.hasVGPRs(RI.getPhysRegClass(SrcReg))) ?
+    Opcode = (RI.hasVGPRs(SrcRC)) ?
       AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
-  } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+  } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
     Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
   } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
-             !RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+             (RI.isProperlyAlignedRC(*RC) &&
+              (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
     // TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov.
     if (ST.hasPackedFP32Ops()) {
       Opcode = AMDGPU::V_PK_MOV_B32;
@@ -3831,10 +3832,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       }
 
       // Check that this is the aligned version of the class.
-      if (!RC || ((IsVGPR && !RC->hasSuperClassEq(RI.getVGPRClassForBitWidth(
-                                 RI.getRegSizeInBits(*RC)))) ||
-                  (IsAGPR && !RC->hasSuperClassEq(RI.getAGPRClassForBitWidth(
-                                 RI.getRegSizeInBits(*RC)))))) {
+      if (!RC || !RI.isProperlyAlignedRC(*RC)) {
         ErrInfo = "Subtarget requires even aligned vector registers";
         return false;
       }

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0f44182660a5..c6ecf2a66e9c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2338,6 +2338,18 @@ MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
   return AMDGPU::NoRegister;
 }
 
+bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
+  if (!ST.needsAlignedVGPRs())
+    return true;
+
+  if (hasVGPRs(&RC))
+    return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
+  if (hasAGPRs(&RC))
+    return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
+
+  return true;
+}
+
 bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
   switch (PhysReg) {
   case AMDGPU::SGPR_NULL:

diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index e3910dd9e0fc..5d360677bed6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -326,6 +326,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   // \returns \p Reg otherwise.
   MCPhysReg get32BitRegister(MCPhysReg Reg) const;
 
+  // Returns true if a given register class is properly aligned for
+  // the subtarget.
+  bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
+
   /// Return all SGPR128 which satisfy the waves per execution unit requirement
   /// of the subtarget.
   ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
index 462a0127a821..4f32a8d580da 100644
--- a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
+++ b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
@@ -158,3 +158,193 @@ body: |
     ; GFX90A: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $sgpr6_sgpr7, 12, $sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7
 ...
+
+---
+name: copy_v64_to_v64_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr2_vgpr3
+    ; GFX908-LABEL: name: copy_v64_to_v64_unaligned
+    ; GFX908: liveins: $vgpr2_vgpr3
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
+    ; GFX90A-LABEL: name: copy_v64_to_v64_unaligned
+    ; GFX90A: liveins: $vgpr2_vgpr3
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
+    $vgpr1_vgpr2 = COPY killed $vgpr2_vgpr3, implicit $exec
+...
+
+---
+name: copy_v64_unaligned_to_v64
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr3_vgpr4
+    ; GFX908-LABEL: name: copy_v64_unaligned_to_v64
+    ; GFX908: liveins: $vgpr3_vgpr4
+    ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
+    ; GFX90A-LABEL: name: copy_v64_unaligned_to_v64
+    ; GFX90A: liveins: $vgpr3_vgpr4
+    ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
+    $vgpr0_vgpr1 = COPY killed $vgpr3_vgpr4, implicit $exec
+...
+
+---
+name: copy_v128_to_v128_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX908-LABEL: name: copy_v128_to_v128_unaligned
+    ; GFX908: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX908: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+    ; GFX90A-LABEL: name: copy_v128_to_v128_unaligned
+    ; GFX90A: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+    ; GFX90A: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+    $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+...
+
+---
+name: copy_v128_unaligned_to_v128
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX908-LABEL: name: copy_v128_unaligned_to_v128
+    ; GFX908: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+    ; GFX90A-LABEL: name: copy_v128_unaligned_to_v128
+    ; GFX90A: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+...
+
+---
+name: copy_s64_to_v64_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+    ; GFX908-LABEL: name: copy_s64_to_v64_unaligned
+    ; GFX908: liveins: $sgpr8_sgpr9
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
+    ; GFX90A-LABEL: name: copy_s64_to_v64_unaligned
+    ; GFX90A: liveins: $sgpr8_sgpr9
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
+    $vgpr1_vgpr2 = COPY killed $sgpr8_sgpr9, implicit $exec
+...
+
+---
+name: copy_s128_to_v128_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX908-LABEL: name: copy_s128_to_v128_unaligned
+    ; GFX908: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX908: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX908: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+    ; GFX90A-LABEL: name: copy_s128_to_v128_unaligned
+    ; GFX90A: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX90A: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+    ; GFX90A: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+    $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+...
+
+---
+name: copy_v96_to_v96_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr8_vgpr9_vgpr10
+    ; GFX908-LABEL: name: copy_v96_to_v96_unaligned
+    ; GFX908: liveins: $vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
+    ; GFX90A-LABEL: name: copy_v96_to_v96_unaligned
+    ; GFX90A: liveins: $vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
+    ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
+    $vgpr1_vgpr2_vgpr3 = COPY killed $vgpr8_vgpr9_vgpr10, implicit $exec
+...
+
+---
+name: copy_v96_unaligned_to_v96
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr7_vgpr8_vgpr9
+    ; GFX908-LABEL: name: copy_v96_unaligned_to_v96
+    ; GFX908: liveins: $vgpr7_vgpr8_vgpr9
+    ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
+    ; GFX90A-LABEL: name: copy_v96_unaligned_to_v96
+    ; GFX90A: liveins: $vgpr7_vgpr8_vgpr9
+    ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
+    $vgpr0_vgpr1_vgpr2 = COPY killed $vgpr7_vgpr8_vgpr9, implicit $exec
+...
+
+---
+name: copy_s96_to_v96
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX908-LABEL: name: copy_s96_to_v96
+    ; GFX908: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    ; GFX90A-LABEL: name: copy_s96_to_v96
+    ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    $vgpr0_vgpr1_vgpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
+...
+
+---
+name: copy_s96_to_v96_unaligned
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX908-LABEL: name: copy_s96_to_v96_unaligned
+    ; GFX908: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    ; GFX90A-LABEL: name: copy_s96_to_v96_unaligned
+    ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+    ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    $vgpr1_vgpr2_vgpr3 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
+...


        


More information about the llvm-commits mailing list