[llvm] 9637848 - [GlobalISel] Fix non-pow-2 legalization of s56 stores.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 16 13:30:12 PDT 2021
Author: Amara Emerson
Date: 2021-07-16T13:29:49-07:00
New Revision: 9637848f51af0e770c6d19b6aa020250bf1d0c0f
URL: https://github.com/llvm/llvm-project/commit/9637848f51af0e770c6d19b6aa020250bf1d0c0f
DIFF: https://github.com/llvm/llvm-project/commit/9637848f51af0e770c6d19b6aa020250bf1d0c0f.diff
LOG: [GlobalISel] Fix non-pow-2 legalization of s56 stores.
s56 stores are broken down into s32 + s24 stores. During this step
both of those new stores use an anyextended s64 value, resulting in
truncating stores. With s56, the s24 requires another lower step to
make it legal, and we were crashing because we didn't expect non-pow-2
stores to also be truncating as well.
Differential Revision: https://reviews.llvm.org/D106183
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7b4a9578e444..8a54b87043a9 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2947,15 +2947,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
if (isPowerOf2_32(MemTy.getSizeInBits()))
return UnableToLegalize; // Don't know what we're being asked to do.
- // Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
- auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+ // Extend to the next pow-2. If this store was itself the result of lowering,
+ // e.g. an s56 store being broken into s32 + s24, we might have a stored type
+ // that's wider the stored size.
+ const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
- uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
- auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+ uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
// Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
index a334322d6b69..6bb84fb34884 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
@@ -1,22 +1,11 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s
---- |
- target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
- target triple = "aarch64"
-
- define i32 @load_store_test(i24* %ptr, i24* %ptr2) {
- %val = load i24, i24* %ptr
- store i24 %val, i24* %ptr2
- ret i32 0
- }
-
-...
---
name: load_store_test
alignment: 4
tracksRegLiveness: true
body: |
- bb.1 (%ir-block.0):
+ bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: load_store_test
@@ -24,26 +13,60 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16) from %ir.ptr, align 4)
+ ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 4)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from %ir.ptr + 2, align 2, basealign 4)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2, align 2)
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s64)
; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s64)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16) into %ir.ptr2, align 4)
- ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into %ir.ptr2 + 2, align 2, basealign 4)
+ ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16), align 4)
+ ; CHECK: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 2, align 2)
; CHECK: $w0 = COPY [[C]](s32)
; CHECK: RET_ReallyLR implicit $w0
%0:_(p0) = COPY $x0
%1:_(p0) = COPY $x1
%3:_(s32) = G_CONSTANT i32 0
- %2:_(s24) = G_LOAD %0(p0) :: (load (s24) from %ir.ptr, align 4)
- G_STORE %2(s24), %1(p0) :: (store (s24) into %ir.ptr2, align 4)
+ %2:_(s24) = G_LOAD %0(p0) :: (load (s24), align 4)
+ G_STORE %2(s24), %1(p0) :: (store (s24), align 4)
$w0 = COPY %3(s32)
RET_ReallyLR implicit $w0
...
+---
+name: store_i56
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: store_i56
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[C]](s64)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+ ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C1]](s64)
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; CHECK: G_STORE [[COPY1]](s64), [[COPY]](p0) :: (store (s32), align 8)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+ ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C3]](s64)
+ ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64)
+ ; CHECK: G_STORE [[TRUNC]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4, align 4)
+ ; CHECK: G_STORE [[LSHR1]](s32), [[PTR_ADD1]](p0) :: (store (s8) into unknown-address + 6, align 2)
+ ; CHECK: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %1:_(s56) = G_CONSTANT i56 32
+ G_STORE %1(s56), %0(p0) :: (store (s56), align 8)
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list