[llvm] r352523 - GlobalISel: Fix narrowScalar for load/store with different mem size

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 29 10:13:02 PST 2019


Author: arsenm
Date: Tue Jan 29 10:13:02 2019
New Revision: 352523

URL: http://llvm.org/viewvc/llvm-project?rev=352523&view=rev
Log:
GlobalISel: Fix narrowScalar for load/store with different mem size

This was ignoring the memory size, and producing multiple loads/stores
if the operand size was different from the memory size.

I assume this is the intent of not having an explicit G_ANYEXTLOAD
(although I think that would probably be better).

Modified:
    llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir

Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=352523&r1=352522&r2=352523&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Tue Jan 29 10:13:02 2019
@@ -537,6 +537,18 @@ LegalizerHelper::LegalizeResult Legalize
       return UnableToLegalize;
 
     const auto &MMO = **MI.memoperands_begin();
+    unsigned DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+
+    if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
+      unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+      auto &MMO = **MI.memoperands_begin();
+      MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
+      MIRBuilder.buildAnyExt(DstReg, TmpReg);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
     // This implementation doesn't work for atomics. Give up instead of doing
     // something invalid.
     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
@@ -566,8 +578,8 @@ LegalizerHelper::LegalizeResult Legalize
 
       DstRegs.push_back(DstReg);
     }
-    unsigned DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
+
+    if (DstTy.isVector())
       MIRBuilder.buildBuildVector(DstReg, DstRegs);
     else
       MIRBuilder.buildMerge(DstReg, DstRegs);
@@ -608,6 +620,19 @@ LegalizerHelper::LegalizeResult Legalize
       return UnableToLegalize;
 
     const auto &MMO = **MI.memoperands_begin();
+
+    unsigned SrcReg = MI.getOperand(0).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+
+    if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
+      unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+      auto &MMO = **MI.memoperands_begin();
+      MIRBuilder.buildTrunc(TmpReg, SrcReg);
+      MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
     // This implementation doesn't work for atomics. Give up instead of doing
     // something invalid.
     if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=352523&r1=352522&r2=352523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Tue Jan 29 10:13:02 2019
@@ -229,12 +229,31 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
     });
 
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
+    .narrowScalarIf([](const LegalityQuery &Query) {
+        unsigned Size = Query.Types[0].getSizeInBits();
+        unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+        return (Size > 32 && MemSize < Size);
+      },
+      [](const LegalityQuery &Query) {
+        return std::make_pair(0, LLT::scalar(32));
+      })
     .legalIf([=, &ST](const LegalityQuery &Query) {
         const LLT &Ty0 = Query.Types[0];
 
+        unsigned Size = Ty0.getSizeInBits();
+        unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+        if (Size > 32 && MemSize < Size)
+          return false;
+
+        if (Ty0.isVector() && Size != MemSize)
+          return false;
+
         // TODO: Decompose private loads into 4-byte components.
         // TODO: Illegal flat loads on SI
-        switch (Ty0.getSizeInBits()) {
+        switch (MemSize) {
+        case 8:
+        case 16:
+          return Size == 32;
         case 32:
         case 64:
         case 128:
@@ -250,7 +269,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
         default:
           return false;
         }
-      });
+      })
+    .clampScalar(0, S32, S64);
 
 
   auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir?rev=352523&r1=352522&r2=352523&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir Tue Jan 29 10:13:02 2019
@@ -129,3 +129,71 @@ body: |
 
     $vgpr0_vgpr1_vgpr2 = COPY %1
 ...
+
+---
+name: test_ext_load_global_s64_from_1_align1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4)
+
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_ext_load_global_s64_from_2_align2
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4)
+
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_ext_load_global_s64_from_4_align4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
+
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_ext_load_global_s128_from_4_align4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4)
+
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir?rev=352523&r1=352522&r2=352523&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir Tue Jan 29 10:13:02 2019
@@ -120,3 +120,67 @@ body: |
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     G_STORE %1, %0 :: (store 12, align 4, addrspace 1)
 ...
+
+---
+name: test_truncstore_global_s64_to_s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 1, addrspace 1)
+...
+
+---
+name: test_truncstore_global_s64_to_s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 1, addrspace 1)
+...
+
+---
+name: test_truncstore_global_s64_to_s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_truncstore_global_s64_to_s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 4, addrspace 1)
+...
+
+---
+name: test_truncstore_global_s128_to_s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_truncstore_global_s128_to_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128)
+    ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 1, addrspace 1)
+...




More information about the llvm-commits mailing list