[llvm] [AMDGPU] narrow only on store to pow of 2 mem location (PR #150093)
Tiger Ding via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 11:48:23 PDT 2025
https://github.com/zerogtiger updated https://github.com/llvm/llvm-project/pull/150093
>From 63409b6f4ab53bbe14968059adf338834ab5692d Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Tue, 22 Jul 2025 14:17:45 -0500
Subject: [PATCH 1/6] [AMDGPU] narrow only on store to pow of 2 mem location
This LLVM defect was identified via the AMD Fuzzing project.
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 20 +-
.../GlobalISel/legalize-store-global.mir | 84 ++++---
.../AMDGPU/GlobalISel/legalize-store.mir | 8 +-
.../AMDGPU/GlobalISel/store-weird-size.ll | 224 ++++++++++++++++++
4 files changed, 289 insertions(+), 47 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index aa678df675fb6..b68d603c0b0df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1618,11 +1618,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::pair(0, EltTy);
})
- .minScalar(0, S32)
- .narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
- .widenScalarToNextPow2(0)
- .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
- .lower();
+ .minScalar(0, S32)
+ // only narrow to mem size if mem size is power of 2
+ .narrowScalarIf(
+ [=](const LegalityQuery &Query) -> bool {
+ unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ return isWideScalarExtLoadTruncStore(0)(Query) &&
+ isPowerOf2_64(MemSize);
+ },
+ [=](const LegalityQuery &Query) {
+ unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ return std::make_pair(0, LLT::scalar(MemSize));
+ })
+ .widenScalarToNextPow2(0)
+ .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
+ .lower();
}
// FIXME: Unaligned accesses not lowered.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index 1080b7dcb53b1..679819b0af2fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -886,33 +886,34 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+ ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C5]]
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C4]](s32)
; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
- ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
; SI-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
- ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
- ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32)
+ ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+ ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY4]](s32)
; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD1]], [[C6]](s64)
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
- ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+ ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C5]]
- ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY4]](s32)
+ ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY5]](s32)
; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C6]](s64)
; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
@@ -922,11 +923,12 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
@@ -936,22 +938,23 @@ body: |
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s64)
; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C4]](s16)
; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
- ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+ ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16)
; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD2]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
@@ -960,11 +963,11 @@ body: |
; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into unknown-address + 2, addrspace 1)
; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16)
; VI-NEXT: G_STORE [[ANYEXT1]](s32), [[PTR_ADD3]](p1) :: (store (s8) into unknown-address + 3, addrspace 1)
- ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
- ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
- ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C4]](s16)
+ ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
+ ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C4]](s16)
; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C5]](s64)
- ; VI-NEXT: G_STORE [[TRUNC3]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
+ ; VI-NEXT: G_STORE [[TRUNC4]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 4, addrspace 1)
; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16)
; VI-NEXT: G_STORE [[ANYEXT2]](s32), [[PTR_ADD4]](p1) :: (store (s8) into unknown-address + 5, addrspace 1)
;
@@ -973,11 +976,12 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 1, addrspace 1)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, align 1, addrspace 1)
@@ -998,17 +1002,18 @@ body: |
; SI-NEXT: {{ $}}
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+ ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; SI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+ ; SI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; SI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1018,11 +1023,12 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
; CI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
; CI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; CI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1032,17 +1038,18 @@ body: |
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+ ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
+ ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C2]](s32)
; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; VI-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
+ ; VI-NEXT: G_STORE [[COPY3]](s32), [[COPY]](p1) :: (store (s16), addrspace 1)
; VI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 2, addrspace 1)
; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; VI-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
@@ -1052,11 +1059,12 @@ body: |
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64)
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C]](s32)
+ ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY2]], [[C]](s32)
; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
; GFX9-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s32), align 2, addrspace 1)
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; GFX9-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 4, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
index 22d792abe3624..19dc3e339678e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir
@@ -285,13 +285,13 @@ body: |
; VI-NEXT: {{ $}}
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
- ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
- ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+ ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64)
+ ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C]](s16)
+ ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C]](s16)
; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; VI-NEXT: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
+ ; VI-NEXT: G_STORE [[TRUNC1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1)
; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
; VI-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p1) :: (store (s8) into unknown-address + 1, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
new file mode 100644
index 0000000000000..f1c7dae66d613
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 -global-isel=true -stop-after=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s
+
+define void @store_i48(ptr addrspace(1) %ptr, i48 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i48
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C1]](s64)
+ ; UNPACKED-NEXT: G_STORE [[COPY2]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i48 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i55(ptr addrspace(1) %ptr, i55 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i55
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36028797018963967
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]]
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND]](s64)
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C1]](s32)
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C2]](s64)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY4]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC1]], [[C3]](s32)
+ ; UNPACKED-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1)
+ ; UNPACKED-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 6, align 2, basealign 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i55 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i56(ptr addrspace(1) %ptr, i56 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i56
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV1]](s64)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY4]], [[C]](s32)
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C1]](s64)
+ ; UNPACKED-NEXT: G_STORE [[COPY2]](s32), [[MV]](p1) :: (store (s32) into %ir.ptr, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C2]](s32)
+ ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1)
+ ; UNPACKED-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 6, align 2, basealign 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i56 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i65(ptr addrspace(1) %ptr, i65 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i65
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[DEF]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]]
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C2]](s64)
+ ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s8) into %ir.ptr + 8, align 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i65 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i95(ptr addrspace(1) %ptr, i95 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i95
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[DEF]](s32)
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2147483647
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]]
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C2]](s64)
+ ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i95 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i96(ptr addrspace(1) %ptr, i96 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i96
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[MV1]](s96)
+ ; UNPACKED-NEXT: G_STORE [[BITCAST]](<3 x s32>), [[MV]](p1) :: (store (<3 x s32>) into %ir.ptr, align 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i96 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i97(ptr addrspace(1) %ptr, i97 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i97
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]]
+ ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; UNPACKED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[C2]](s64)
+ ; UNPACKED-NEXT: G_STORE [[AND]](s64), [[MV]](p1) :: (store (s64) into %ir.ptr, align 4, addrspace 1)
+ ; UNPACKED-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C3]](s32)
+ ; UNPACKED-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; UNPACKED-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1)
+ ; UNPACKED-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; UNPACKED-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD1]](p1) :: (store (s8) into %ir.ptr + 12, align 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i97 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+define void @store_i127(ptr addrspace(1) %ptr, i127 %arg) #0 {
+ ; UNPACKED-LABEL: name: store_i127
+ ; UNPACKED: bb.1 (%ir-block.0):
+ ; UNPACKED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; UNPACKED-NEXT: {{ $}}
+ ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; UNPACKED-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+ ; UNPACKED-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; UNPACKED-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C]]
+ ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV2]], [[C1]]
+ ; UNPACKED-NEXT: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
+ ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[MV3]](s128)
+ ; UNPACKED-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[MV]](p1) :: (store (<4 x s32>) into %ir.ptr, align 4, addrspace 1)
+ ; UNPACKED-NEXT: SI_RETURN
+ store i127 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
>From 18bb7c6dc1f1984f3dbc4fc5ede7284af791c587 Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Thu, 24 Jul 2025 09:15:34 -0500
Subject: [PATCH 2/6] Lit test indentation fix
---
llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
index f1c7dae66d613..67cfc860def57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
@@ -139,7 +139,7 @@ define void @store_i95(ptr addrspace(1) %ptr, i95 %arg) #0 {
; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1)
; UNPACKED-NEXT: SI_RETURN
store i95 %arg, ptr addrspace(1) %ptr, align 4
- ret void
+ ret void
}
define void @store_i96(ptr addrspace(1) %ptr, i96 %arg) #0 {
>From 228d359f3845b7f48108e66c1e43ceb37bc6e320 Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Thu, 24 Jul 2025 09:33:07 -0500
Subject: [PATCH 3/6] removed verify-machineinstr
---
llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
index 67cfc860def57..be2e4221ceb5e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 -global-isel=true -stop-after=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=UNPACKED %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -O0 -global-isel=true -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
define void @store_i48(ptr addrspace(1) %ptr, i48 %arg) #0 {
; UNPACKED-LABEL: name: store_i48
>From 3529343d6fe404c2743b3bc4ff7a8c5f65eeb601 Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Thu, 24 Jul 2025 09:37:26 -0500
Subject: [PATCH 4/6] Indent fix (again)
---
.../AMDGPU/GlobalISel/store-weird-size.ll | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
index be2e4221ceb5e..732488d2048bc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-weird-size.ll
@@ -21,8 +21,8 @@ define void @store_i48(ptr addrspace(1) %ptr, i48 %arg) #0 {
; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s16) into %ir.ptr + 4, align 4, addrspace 1)
; UNPACKED-NEXT: SI_RETURN
- store i48 %arg, ptr addrspace(1) %ptr, align 4
- ret void
+ store i48 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
}
define void @store_i55(ptr addrspace(1) %ptr, i55 %arg) #0 {
@@ -110,8 +110,8 @@ define void @store_i65(ptr addrspace(1) %ptr, i65 %arg) #0 {
; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s8) into %ir.ptr + 8, align 4, addrspace 1)
; UNPACKED-NEXT: SI_RETURN
- store i65 %arg, ptr addrspace(1) %ptr, align 4
- ret void
+ store i65 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
}
define void @store_i95(ptr addrspace(1) %ptr, i95 %arg) #0 {
@@ -138,8 +138,8 @@ define void @store_i95(ptr addrspace(1) %ptr, i95 %arg) #0 {
; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64)
; UNPACKED-NEXT: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p1) :: (store (s32) into %ir.ptr + 8, addrspace 1)
; UNPACKED-NEXT: SI_RETURN
- store i95 %arg, ptr addrspace(1) %ptr, align 4
- ret void
+ store i95 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
}
define void @store_i96(ptr addrspace(1) %ptr, i96 %arg) #0 {
@@ -217,8 +217,8 @@ define void @store_i127(ptr addrspace(1) %ptr, i127 %arg) #0 {
; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[MV3]](s128)
; UNPACKED-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[MV]](p1) :: (store (<4 x s32>) into %ir.ptr, align 4, addrspace 1)
; UNPACKED-NEXT: SI_RETURN
- store i127 %arg, ptr addrspace(1) %ptr, align 4
- ret void
+ store i127 %arg, ptr addrspace(1) %ptr, align 4
+ ret void
}
attributes #0 = { nounwind }
>From d13f0c0064adac8bae8e3e1ccc7a4b9a8a9e8ea1 Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Mon, 28 Jul 2025 12:05:49 -0500
Subject: [PATCH 5/6] Refactored narrow functions
---
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 30 ++++++++++++-------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index b68d603c0b0df..cd2e4a7aa8c6d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -137,6 +138,14 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
};
}
+// Retrieves the scalar type that's the same size as the mem desc
+static LegalizeMutation getScalarTypeFromMemDesc(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ return std::make_pair(TypeIdx, LLT::scalar(MemSize));
+ };
+}
+
// Increase the number of vector elements to reach the next legal RegClass.
static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
@@ -384,6 +393,15 @@ static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) {
};
}
+// If we have a truncating store or an extending load with a data size larger
+// than 32-bits and mem location is a power of 2
+static LegalityPredicate isTruncStoreToSizePowerOf2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ return isWideScalarExtLoadTruncStore(TypeIdx)(Query) && isPowerOf2_64(MemSize);
+ };
+}
+
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
// handle some operations by just promoting the register during
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
@@ -1619,17 +1637,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return std::pair(0, EltTy);
})
.minScalar(0, S32)
- // only narrow to mem size if mem size is power of 2
- .narrowScalarIf(
- [=](const LegalityQuery &Query) -> bool {
- unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
- return isWideScalarExtLoadTruncStore(0)(Query) &&
- isPowerOf2_64(MemSize);
- },
- [=](const LegalityQuery &Query) {
- unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
- return std::make_pair(0, LLT::scalar(MemSize));
- })
+ .narrowScalarIf(isTruncStoreToSizePowerOf2(0), getScalarTypeFromMemDesc(0))
.widenScalarToNextPow2(0)
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
.lower();
>From a008f282a20d19cd3411ab343d01cee63d8fddcf Mon Sep 17 00:00:00 2001
From: Tiger Ding <zerogtiger at gmail.com>
Date: Mon, 28 Jul 2025 13:47:44 -0500
Subject: [PATCH 6/6] Indentation fix (again again)
---
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cd2e4a7aa8c6d..3bcc07e3ab49a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1636,11 +1636,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::pair(0, EltTy);
})
- .minScalar(0, S32)
- .narrowScalarIf(isTruncStoreToSizePowerOf2(0), getScalarTypeFromMemDesc(0))
- .widenScalarToNextPow2(0)
- .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
- .lower();
+ .minScalar(0, S32)
+ .narrowScalarIf(isTruncStoreToSizePowerOf2(0), getScalarTypeFromMemDesc(0))
+ .widenScalarToNextPow2(0)
+ .moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
+ .lower();
}
// FIXME: Unaligned accesses not lowered.
More information about the llvm-commits
mailing list