[llvm] [SLP]Enable splat ordering for loads (PR #115173)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 07:24:21 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
Enables splat support for loads with lanes> 2 or number of operands> 2.
Allows better detect splats of loads and reduces number of shuffles in
some cases.
X86, AVX512, -O3+LTO
Metric: size..text
results results0 diff
test-suite :: External/SPEC/CFP2006/433.milc/433.milc.test 154867.00 156723.00 1.2%
test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12467735.00 12468023.00 0.0%
Better vectorization quality
---
Full diff: https://github.com/llvm/llvm-project/pull/115173.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+11-8)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll (+6-12)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4454eb3e34d983..bf7bc570000dde 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2386,6 +2386,9 @@ class BoUpSLP {
/// the whole vector (it is mixed with constants or loop invariant values).
/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
+ // Small number of loads - try load matching.
+ if (isa<LoadInst>(Op) && getNumLanes() == 2 && getNumOperands() == 2)
+ return false;
bool OpAPO = getData(OpIdx, Lane).APO;
bool IsInvariant = L && L->isLoopInvariant(Op);
unsigned Cnt = 0;
@@ -2511,23 +2514,23 @@ class BoUpSLP {
Value *OpLane0 = getValue(OpIdx, FirstLane);
// Keep track if we have instructions with all the same opcode on one
// side.
- if (isa<LoadInst>(OpLane0))
- ReorderingModes[OpIdx] = ReorderingMode::Load;
- else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
+ if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
// Check if OpLane0 should be broadcast.
if (shouldBroadcast(OpLane0, OpIdx, FirstLane) ||
!canBeVectorized(OpILane0, OpIdx, FirstLane))
ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else if (isa<LoadInst>(OpILane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Load;
else
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
- } else if (isa<Constant>(OpLane0))
+ } else if (isa<Constant>(OpLane0)) {
ReorderingModes[OpIdx] = ReorderingMode::Constant;
- else if (isa<Argument>(OpLane0))
+ } else if (isa<Argument>(OpLane0)) {
// Our best hope is a Splat. It may save some cost in some cases.
ReorderingModes[OpIdx] = ReorderingMode::Splat;
- else
- // NOTE: This should be unreachable.
- ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ } else {
+ llvm_unreachable("Unexpected value kind.");
+ }
}
// Check that we don't have same operands. No need to reorder if operands
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll
index 928cbe36554119..1e7cc9c268cfa1 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll
@@ -8,19 +8,13 @@ define fastcc void @rephase(ptr %phases_in, ptr %157, i64 %158) {
; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 8
; CHECK-NEXT: [[IMAG_247:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 408
-; CHECK-NEXT: [[MUL35_248:%.*]] = fmul double [[TMP2]], 0.000000e+00
-; CHECK-NEXT: store double [[MUL35_248]], ptr [[IMAG_247]], align 8
-; CHECK-NEXT: [[ARRAYIDX23_1_249:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 416
-; CHECK-NEXT: [[MUL_1_250:%.*]] = fmul double [[TMP2]], 0.000000e+00
-; CHECK-NEXT: store double [[MUL_1_250]], ptr [[ARRAYIDX23_1_249]], align 8
; CHECK-NEXT: [[IMAG_1_251:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 424
-; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[IMAG_1_251]], align 8
-; CHECK-NEXT: [[MUL35_1_252:%.*]] = fmul double [[TMP2]], [[TMP3]]
-; CHECK-NEXT: store double [[MUL35_1_252]], ptr [[IMAG_1_251]], align 8
-; CHECK-NEXT: [[ARRAYIDX23_2_253:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 432
-; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX23_2_253]], align 8
-; CHECK-NEXT: [[MUL_2_254:%.*]] = fmul double [[TMP2]], [[TMP4]]
-; CHECK-NEXT: store double [[MUL_2_254]], ptr [[ARRAYIDX23_2_253]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IMAG_1_251]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <2 x double> [[TMP3]], i64 2)
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP5]], [[TMP6]]
+; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[IMAG_247]], align 8
; CHECK-NEXT: store double [[TMP2]], ptr [[PHASES_IN]], align 8
; CHECK-NEXT: ret void
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/115173
More information about the llvm-commits
mailing list