[llvm] 88bdcbb - GlobalISel: Lift store value widening restriction

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 16 11:25:14 PDT 2020


Author: Matt Arsenault
Date: 2020-09-16T14:25:07-04:00
New Revision: 88bdcbbf1aaef6ac99877cc511bf4b2a85343773

URL: https://github.com/llvm/llvm-project/commit/88bdcbbf1aaef6ac99877cc511bf4b2a85343773
DIFF: https://github.com/llvm/llvm-project/commit/88bdcbbf1aaef6ac99877cc511bf4b2a85343773.diff

LOG: GlobalISel: Lift store value widening restriction

This doesn't change the memory size and doesn't need to worry about
non-power-of-2 sizes.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 347fe7b0ee98..a8283e47acdd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2033,7 +2033,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
       return UnableToLegalize;
 
     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-    if (!isPowerOf2_32(Ty.getSizeInBits()))
+    if (!Ty.isScalar())
       return UnableToLegalize;
 
     Observer.changingInstr(MI);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index 8b607244eb8e..80bd3e1f6ec8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -44,6 +44,38 @@ body: |
     G_STORE %2, %0 :: (store 1, align 1, addrspace 1)
 ...
 
+---
+name: test_store_global_s7_align1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; SI-LABEL: name: test_store_global_s7_align1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; CI-LABEL: name: test_store_global_s7_align1
+    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI-LABEL: name: test_store_global_s7_align1
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; GFX9-LABEL: name: test_store_global_s7_align1
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s7) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 1, align 1, addrspace 1)
+...
+
 ---
 name: test_store_global_s8_align1
 body: |
@@ -192,6 +224,262 @@ body: |
     G_STORE %2, %0 :: (store 2, align 4, addrspace 1)
 ...
 
+---
+name: test_store_global_s24_align4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; SI-LABEL: name: test_store_global_s24_align4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; SI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; CI-LABEL: name: test_store_global_s24_align4
+    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; VI-LABEL: name: test_store_global_s24_align4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; VI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; GFX9-LABEL: name: test_store_global_s24_align4
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 4, addrspace 1)
+    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s24) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 3, align 4, addrspace 1)
+...
+
+---
+name: test_store_global_s24_align2
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; SI-LABEL: name: test_store_global_s24_align2
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32)
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; CI-LABEL: name: test_store_global_s24_align2
+    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; VI-LABEL: name: test_store_global_s24_align2
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    ; GFX9-LABEL: name: test_store_global_s24_align2
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, align 2, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s24) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 3, align 2, addrspace 1)
+...
+
+---
+name: test_store_global_s24_align1
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; SI-LABEL: name: test_store_global_s24_align1
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+    ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32)
+    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; SI: G_STORE [[COPY6]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; SI: G_STORE [[COPY7]](s32), [[PTR_ADD]](p1) :: (store 1 + 1, addrspace 1)
+    ; SI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; SI: G_STORE [[COPY8]](s32), [[PTR_ADD1]](p1) :: (store 1 + 2, addrspace 1)
+    ; CI-LABEL: name: test_store_global_s24_align1
+    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1)
+    ; CI: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, addrspace 1)
+    ; VI-LABEL: name: test_store_global_s24_align1
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
+    ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND]](s32)
+    ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16)
+    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; VI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
+    ; VI: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store 1 + 1, addrspace 1)
+    ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; VI: G_STORE [[COPY6]](s32), [[PTR_ADD1]](p1) :: (store 1 + 2, addrspace 1)
+    ; GFX9-LABEL: name: test_store_global_s24_align1
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, align 1, addrspace 1)
+    ; GFX9: G_STORE [[LSHR]](s32), [[PTR_ADD]](p1) :: (store 1 + 2, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s24) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 3, align 1, addrspace 1)
+...
+
+---
+name: test_store_global_s25_align4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; SI-LABEL: name: test_store_global_s25_align4
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; CI-LABEL: name: test_store_global_s25_align4
+    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; VI-LABEL: name: test_store_global_s25_align4
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    ; GFX9-LABEL: name: test_store_global_s25_align4
+    ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 4, addrspace 1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s25) = G_TRUNC %1
+    G_STORE %2, %0 :: (store 4, align 4, addrspace 1)
+...
+
+# ---
+# name: test_store_global_s25_align2
+# body: |
+#   bb.0:
+#     liveins: $vgpr0_vgpr1, $vgpr2
+
+#     %0:_(p1) = COPY $vgpr0_vgpr1
+#     %1:_(s32) = COPY $vgpr2
+#     %2:_(s25) = G_TRUNC %1
+#     G_STORE %2, %0 :: (store 4, align 2, addrspace 1)
+# ...
+
+# ---
+# name: test_store_global_s25_align1
+# body: |
+#   bb.0:
+#     liveins: $vgpr0_vgpr1, $vgpr2
+
+#     %0:_(p1) = COPY $vgpr0_vgpr1
+#     %1:_(s32) = COPY $vgpr2
+#     %2:_(s25) = G_TRUNC %1
+#     G_STORE %2, %0 :: (store 4, align 1, addrspace 1)
+# ...
+
 ---
 name: test_store_global_s32_align1
 body: |

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index 758d5b01c978..bba490ee57da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -929,15 +929,59 @@ body: |
     ; SI-LABEL: name: test_truncstore_global_v3s8_to_1_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 1, addrspace 1)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 1, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1
@@ -954,15 +998,59 @@ body: |
     ; SI-LABEL: name: test_truncstore_global_v3s8_to_2_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; SI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; SI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; SI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]]
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32)
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32)
+    ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
     ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2
     ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
-    ; VI: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>)
-    ; VI: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
-    ; VI: G_STORE [[BITCAST]](s24), [[COPY]](p1) :: (store 2, addrspace 1)
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+    ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+    ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store 2, addrspace 1)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
     %2:_(<3 x s8>) = G_TRUNC %1


        


More information about the llvm-commits mailing list