[llvm] r184966 - Do not add cse-ed instructions into the visited map because we dont want to consider them as a candidate for replacement of instructions to be visited.
Nadav Rotem
nrotem at apple.com
Wed Jun 26 09:54:53 PDT 2013
Author: nadav
Date: Wed Jun 26 11:54:53 2013
New Revision: 184966
URL: http://llvm.org/viewvc/llvm-project?rev=184966&view=rev
Log:
Do not add cse-ed instructions into the visited map because we dont want to consider them as a candidate for replacement of instructions to be visited.
Modified:
llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=184966&r1=184965&r2=184966&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Jun 26 11:54:53 2013
@@ -1258,6 +1258,8 @@ Value *FuncSLP::vectorizeArith(ArrayRef<
for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
Operands[i]->replaceAllUsesWith(S);
+ Instruction *I = cast<Instruction>(Operands[i]);
+ I->eraseFromParent();
}
return Vec;
@@ -1280,7 +1282,7 @@ void FuncSLP::optimizeGatherSequence() {
// Check if it has a preheader.
BasicBlock *PreHeader = L->getLoopPreheader();
if (!PreHeader)
- return;
+ continue;
// If the vector or the element that we insert into it are
// instructions that are defined in this basic block then we can't
@@ -1310,17 +1312,19 @@ void FuncSLP::optimizeGatherSequence() {
if (!Insert || !GatherSeq.count(Insert))
continue;
- // Check if we can replace this instruction with any of the
- // visited instructions.
+ // Check if we can replace this instruction with any of the
+ // visited instructions.
for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
ve = Visited.end(); v != ve; ++v) {
if (Insert->isIdenticalTo(*v) &&
- DT->dominates((*v)->getParent(), Insert->getParent())) {
+ DT->dominates((*v)->getParent(), Insert->getParent())) {
Insert->replaceAllUsesWith(*v);
+ Insert = 0;
break;
}
}
- Visited.insert(Insert);
+ if (Insert)
+ Visited.insert(Insert);
}
}
}
Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll?rev=184966&r1=184965&r2=184966&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/cse.ll Wed Jun 26 11:54:53 2013
@@ -134,3 +134,87 @@ define i32 @test2(double* nocapture %G,
ret i32 undef
}
+
+;int foo(double *A, int n) {
+; A[0] = A[0] * 7.9 * n + 6.0;
+; A[1] = A[1] * 7.9 * n + 6.0;
+; A[2] = A[2] * 7.9 * n + 6.0;
+; A[3] = A[3] * 7.9 * n + 6.0;
+;}
+;CHECK: @foo4
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @foo4(double* nocapture %A, i32 %n) {
+entry:
+ %0 = load double* %A, align 8
+ %mul = fmul double %0, 7.900000e+00
+ %conv = sitofp i32 %n to double
+ %mul1 = fmul double %conv, %mul
+ %add = fadd double %mul1, 6.000000e+00
+ store double %add, double* %A, align 8
+ %arrayidx3 = getelementptr inbounds double* %A, i64 1
+ %1 = load double* %arrayidx3, align 8
+ %mul4 = fmul double %1, 7.900000e+00
+ %mul6 = fmul double %conv, %mul4
+ %add7 = fadd double %mul6, 6.000000e+00
+ store double %add7, double* %arrayidx3, align 8
+ %arrayidx9 = getelementptr inbounds double* %A, i64 2
+ %2 = load double* %arrayidx9, align 8
+ %mul10 = fmul double %2, 7.900000e+00
+ %mul12 = fmul double %conv, %mul10
+ %add13 = fadd double %mul12, 6.000000e+00
+ store double %add13, double* %arrayidx9, align 8
+ %arrayidx15 = getelementptr inbounds double* %A, i64 3
+ %3 = load double* %arrayidx15, align 8
+ %mul16 = fmul double %3, 7.900000e+00
+ %mul18 = fmul double %conv, %mul16
+ %add19 = fadd double %mul18, 6.000000e+00
+ store double %add19, double* %arrayidx15, align 8
+ ret i32 undef
+}
+
+;int partial_mrg(double *A, int n) {
+; A[0] = A[0] * n;
+; A[1] = A[1] * n;
+; if (n < 4) return 0;
+; A[2] = A[2] * n;
+; A[3] = A[3] * (n+4);
+;}
+;CHECK: @partial_mrg
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @partial_mrg(double* nocapture %A, i32 %n) {
+entry:
+ %0 = load double* %A, align 8
+ %conv = sitofp i32 %n to double
+ %mul = fmul double %conv, %0
+ store double %mul, double* %A, align 8
+ %arrayidx2 = getelementptr inbounds double* %A, i64 1
+ %1 = load double* %arrayidx2, align 8
+ %mul4 = fmul double %conv, %1
+ store double %mul4, double* %arrayidx2, align 8
+ %cmp = icmp slt i32 %n, 4
+ br i1 %cmp, label %return, label %if.end
+
+if.end: ; preds = %entry
+ %arrayidx7 = getelementptr inbounds double* %A, i64 2
+ %2 = load double* %arrayidx7, align 8
+ %mul9 = fmul double %conv, %2
+ store double %mul9, double* %arrayidx7, align 8
+ %arrayidx11 = getelementptr inbounds double* %A, i64 3
+ %3 = load double* %arrayidx11, align 8
+ %add = add nsw i32 %n, 4
+ %conv12 = sitofp i32 %add to double
+ %mul13 = fmul double %conv12, %3
+ store double %mul13, double* %arrayidx11, align 8
+ br label %return
+
+return: ; preds = %entry, %if.end
+ ret i32 0
+}
+
More information about the llvm-commits
mailing list