[llvm] bb82c79 - [SLP] Enable optimization of freeze instructions (#102217)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 12:01:40 PDT 2024
Author: John McIver
Date: 2024-08-07T15:01:37-04:00
New Revision: bb82c79d3b6b3c062890c2cb01fcb183d4f50d27
URL: https://github.com/llvm/llvm-project/commit/bb82c79d3b6b3c062890c2cb01fcb183d4f50d27
DIFF: https://github.com/llvm/llvm-project/commit/bb82c79d3b6b3c062890c2cb01fcb183d4f50d27.diff
LOG: [SLP] Enable optimization of freeze instructions (#102217)
Allow SLP optimization to progress in the presence of freeze
instructions. Prior
to this commit, freeze instructions blocked SLP optimization.
The following URL shows correctness of the addsub_freeze test:
https://alive2.llvm.org/ce/z/qm38oh
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4186b17e644b0..1dc291ebddc0d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6493,6 +6493,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::Freeze:
return TreeEntry::Vectorize;
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
@@ -7330,7 +7331,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor: {
+ case Instruction::Xor:
+ case Instruction::Freeze: {
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndices);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
@@ -10118,6 +10120,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
};
return GetCostDiff(GetScalarCost, GetVectorCost);
}
+ case Instruction::Freeze:
+ return CommonCost;
default:
llvm_unreachable("Unknown instruction");
}
@@ -13390,6 +13394,24 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
return V;
}
+ case Instruction::Freeze: {
+ setInsertPointAfterBundle(E);
+
+ Value *Op = vectorizeOperand(E, 0, PostponedPHIs);
+
+ if (E->VectorizedValue) {
+ LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
+ return E->VectorizedValue;
+ }
+
+ Value *V = Builder.CreateFreeze(Op);
+ V = FinalShuffle(V, E, VecTy);
+
+ E->VectorizedValue = V;
+ ++NumVectorInstructions;
+
+ return V;
+ }
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index 530643a029a56..f7bd2431a7605 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -68,42 +68,17 @@ entry:
define void @addsub_freeze() #0 {
; CHECK-LABEL: @addsub_freeze(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @b, align 4
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4
-; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
-; CHECK-NEXT: [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
-; CHECK-NEXT: store i32 [[FREEZE_ADD2]], ptr @a, align 4
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
-; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
-; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
-; CHECK-NEXT: [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
-; CHECK-NEXT: store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
-; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
-; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
-; CHECK-NEXT: [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
-; CHECK-NEXT: store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
-; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
-; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
-; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
-; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
-; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
-; CHECK-NEXT: [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
-; CHECK-NEXT: store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @c, align 4
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @d, align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr @e, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = freeze <4 x i32> [[TMP8]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @a, align 4
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
index 400d1ac38faba..1804ef5e37833 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll
@@ -48,18 +48,12 @@ define void @fmuladd_2f64() #0 {
define void @fmuladd_2f64_freeze() #0 {
; CHECK-LABEL: @fmuladd_2f64_freeze(
-; CHECK-NEXT: [[A0:%.*]] = load double, ptr @srcA64, align 8
-; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
-; CHECK-NEXT: [[B0:%.*]] = load double, ptr @srcB64, align 8
-; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
-; CHECK-NEXT: [[C0:%.*]] = load double, ptr @srcC64, align 8
-; CHECK-NEXT: [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
-; CHECK-NEXT: [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
-; CHECK-NEXT: [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
-; CHECK-NEXT: [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
-; CHECK-NEXT: [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
-; CHECK-NEXT: store double [[FREEZE0]], ptr @dst64, align 8
-; CHECK-NEXT: store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr @srcB64, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr @srcC64, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <2 x double> [[TMP4]]
+; CHECK-NEXT: store <2 x double> [[TMP5]], ptr @dst64, align 8
; CHECK-NEXT: ret void
;
%a0 = load double, ptr @srcA64, align 8
More information about the llvm-commits
mailing list