[PATCH] D93406: [VectorCombine] optimize alignment for load transform
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 16 12:26:55 PST 2020
This revision was automatically updated to reflect the committed changes.
Closed by commit rG38ebc1a13dc8: [VectorCombine] optimize alignment for load transform (authored by spatel).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D93406/new/
https://reviews.llvm.org/D93406
Files:
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll
Index: llvm/test/Transforms/VectorCombine/X86/load.ll
===================================================================
--- llvm/test/Transforms/VectorCombine/X86/load.ll
+++ llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -175,7 +175,7 @@
define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_f32_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[R]]
;
@@ -201,7 +201,7 @@
define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_i32_insert_v4i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x i32> [[R]]
;
@@ -434,7 +434,7 @@
define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_i32_insert_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i32> [[R]]
;
@@ -458,7 +458,7 @@
define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_f32_insert_v16f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <16 x float> [[R]]
;
@@ -470,7 +470,7 @@
define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_f32_insert_v2f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <2 x i32> <i32 0, i32 undef>
; CHECK-NEXT: ret <2 x float> [[R]]
;
@@ -525,7 +525,7 @@
define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[R]]
;
@@ -538,7 +538,7 @@
define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) {
; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: ret <4 x float> [[R]]
;
Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -143,7 +143,8 @@
return false;
// Original pattern: insertelt undef, load [free casts of] PtrOp, 0
- Align Alignment = Load->getAlign();
+ // Use the greater of the alignment on the load or its source pointer.
+ Align Alignment = std::max(SrcPtr->getPointerAlignment(DL), Load->getAlign());
Type *LoadTy = Load->getType();
int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D93406.312281.patch
Type: text/x-patch
Size: 5056 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201216/a302be4f/attachment.bin>
More information about the llvm-commits
mailing list