[llvm] 3d25fdc - [AArch64][GlobalISel] Allow vector store legalization into 128-bit-wide types
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 27 14:53:51 PST 2021
Author: Jessica Paquette
Date: 2021-01-27T14:46:44-08:00
New Revision: 3d25fdc5c21f174d38ac78dd01ccaf6eec655bc0
URL: https://github.com/llvm/llvm-project/commit/3d25fdc5c21f174d38ac78dd01ccaf6eec655bc0
DIFF: https://github.com/llvm/llvm-project/commit/3d25fdc5c21f174d38ac78dd01ccaf6eec655bc0.diff
LOG: [AArch64][GlobalISel] Allow vector store legalization into 128-bit-wide types
We are allowed to store 128-bit-wide values using the q registers on AArch64.
GlobalISel was clamping the number of elements in vector stores into 64 bits
instead.
This results in some poor codegen like below:
https://godbolt.org/z/E56dq8
```
; SDAG uses a stp + q registers in both cases here.
define void @float(<16 x float> %val, <16 x float>* %ptr) {
store <16 x float> %val, <16 x float>* %ptr
ret void
}
define void @double(<8 x double> %val, <8 x double>* %ptr) {
store <8 x double> %val, <8 x double>* %ptr
ret void
}
```
This adds similar legalization for vector stores with s8 and s16 elements.
Differential Revision: https://reviews.llvm.org/D95107
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index ed6ccb6ef7e2..6069b1051da9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -319,8 +319,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return Query.Types[0].isScalar() &&
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampMaxNumElements(0, s32, 2)
- .clampMaxNumElements(0, s64, 1)
+ // Maximum: sN * k = 128
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampMaxNumElements(0, s32, 4)
+ .clampMaxNumElements(0, s64, 2)
.customIf(IsPtrVecPred);
// Constants
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
index 11d9b2624f9f..3ef8e24d181e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fpext.mir
@@ -19,18 +19,10 @@ body: |
; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; CHECK: [[FPEXT:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV]](<2 x s32>)
; CHECK: [[FPEXT1:%[0-9]+]]:_(<2 x s64>) = G_FPEXT [[UV1]](<2 x s32>)
- ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT]](<2 x s64>)
- ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FPEXT1]](<2 x s64>)
- ; CHECK: G_STORE [[UV2]](s64), [[COPY1]](p0) :: (store 8, align 32)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK: G_STORE [[FPEXT]](<2 x s64>), [[COPY1]](p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
- ; CHECK: G_STORE [[UV3]](s64), [[PTR_ADD]](p0) :: (store 8 + 8)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
- ; CHECK: G_STORE [[UV4]](s64), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
- ; CHECK: G_STORE [[UV5]](s64), [[PTR_ADD2]](p0) :: (store 8 + 24)
+ ; CHECK: G_STORE [[FPEXT1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 + 16)
; CHECK: RET_ReallyLR
%0:_(<4 x s32>) = COPY $q0
%1:_(p0) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
index 381bd03cf19c..a2ae79974c1f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
@@ -115,16 +115,12 @@ body: |
; CHECK: [[FPTRUNC2:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY2]](<2 x s64>)
; CHECK: [[FPTRUNC3:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY3]](<2 x s64>)
; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK: G_STORE [[FPTRUNC]](<2 x s32>), [[COPY5]](p0) :: (store 8, align 32)
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>)
+ ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC2]](<2 x s32>), [[FPTRUNC3]](<2 x s32>)
+ ; CHECK: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY5]](p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64)
- ; CHECK: G_STORE [[FPTRUNC1]](<2 x s32>), [[PTR_ADD]](p0) :: (store 8 + 8)
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C1]](s64)
- ; CHECK: G_STORE [[FPTRUNC2]](<2 x s32>), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16)
- ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
- ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C2]](s64)
- ; CHECK: G_STORE [[FPTRUNC3]](<2 x s32>), [[PTR_ADD2]](p0) :: (store 8 + 24)
+ ; CHECK: G_STORE [[CONCAT_VECTORS1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 + 16)
; CHECK: RET_ReallyLR
%2:_(<2 x s64>) = COPY $q0
%3:_(<2 x s64>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
index 61104c6e432e..45732f6c9c7f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir
@@ -302,3 +302,96 @@ body: |
G_STORE %1(<8 x s8>), %0(p0) :: (store 8)
RET_ReallyLR
...
+---
+name: store_32xs8
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_32xs8
+ ; CHECK: liveins: $x0
+ ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s8>), [[PTR_ADD]](p0) :: (store 16 + 16)
+ ; CHECK: RET_ReallyLR
+ %val:_(<32 x s8>) = G_IMPLICIT_DEF
+ %ptr:_(p0) = COPY $x0
+ G_STORE %val(<32 x s8>), %ptr(p0) :: (store 32)
+ RET_ReallyLR
+...
+---
+name: store_16xs16
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_16xs16
+ ; CHECK: liveins: $x0
+ ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s16>), [[PTR_ADD]](p0) :: (store 16 + 16)
+ ; CHECK: RET_ReallyLR
+ %val:_(<16 x s16>) = G_IMPLICIT_DEF
+ %ptr:_(p0) = COPY $x0
+ G_STORE %val(<16 x s16>), %ptr(p0) :: (store 32)
+ RET_ReallyLR
+...
+---
+name: store_8xs32
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_8xs32
+ ; CHECK: liveins: $x0
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 + 16)
+ ; CHECK: RET_ReallyLR
+ %val:_(<8 x s32>) = G_IMPLICIT_DEF
+ %ptr:_(p0) = COPY $x0
+ G_STORE %val(<8 x s32>), %ptr(p0) :: (store 32)
+ RET_ReallyLR
+...
+---
+name: store_4xs64
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.1:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_4xs64
+ ; CHECK: liveins: $x0
+ ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: G_STORE [[DEF]](<2 x s64>), %ptr(p0) :: (store 16, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
+ ; CHECK: G_STORE [[DEF]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 + 16)
+ ; CHECK: RET_ReallyLR
+ %val:_(<4 x s64>) = G_IMPLICIT_DEF
+ %ptr:_(p0) = COPY $x0
+ G_STORE %val(<4 x s64>), %ptr(p0) :: (store 32)
+ RET_ReallyLR
More information about the llvm-commits
mailing list