[PATCH] D42263: [ARM] Fix perf regression in compare optimization.
Joel Galenson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 16:26:48 PST 2018
jgalenson updated this revision to Diff 130518.
jgalenson edited the summary of this revision.
jgalenson added a comment.
This guards the potential bug Eli pointed out.
This isn't a perfect solution, since if E is the first statement in the block, the loop won't search it (and the loop structure is confusing). Hopefully we can fix that in a follow-up commit.
https://reviews.llvm.org/D42263
Files:
lib/Target/ARM/ARMBaseInstrInfo.cpp
test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
Index: test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
===================================================================
--- test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
+++ test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
@@ -197,6 +197,38 @@
declare void @external_fn(...) local_unnamed_addr #0
+define i32 @are_equal(i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: are_equal
+; CHECK: subs r{{[0-9]+}}, r{{[0-9]+}}, #1
+; CHECK-NEXT: bne
+entry:
+ %tobool7 = icmp eq i32 %n, 0
+ br i1 %tobool7, label %while.end, label %land.rhs.preheader
+
+land.rhs.preheader:
+ br label %land.rhs
+
+while.cond:
+ %tobool = icmp eq i32 %dec9, 0
+ br i1 %tobool, label %while.end, label %land.rhs
+
+land.rhs:
+ %dec9.in = phi i32 [ %dec9, %while.cond ], [ %n, %land.rhs.preheader ]
+ %dec9 = add nsw i32 %dec9.in, -1
+ %arrayidx = getelementptr inbounds i32, i32* %a1, i32 %dec9
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %a2, i32 %dec9
+ %1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %0, %1
+ br i1 %cmp, label %while.cond, label %while.end
+
+while.end:
+ %n.addr.0.lcssa = phi i32 [ 0, %entry ], [ 0, %while.cond ], [ %dec9.in, %land.rhs ]
+ %cmp2 = icmp slt i32 %n.addr.0.lcssa, 1
+ %conv = zext i1 %cmp2 to i32
+ ret i32 %conv
+}
+
declare void @llvm.trap() #2
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
Index: lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2736,7 +2736,7 @@
}
I = CmpInstr;
E = MI;
- } else {
+ } else if (E != B) {
// Allow the loop below to search E (which was initially MI). Since MI and
// SubAdd have different tests, even if that instruction could not be MI, it
// could still potentially be SubAdd.
@@ -2764,7 +2764,7 @@
if (I == B)
// The 'and' is below the comparison instruction.
- return false;
+ break;
}
// Return false if no candidates exist.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D42263.130518.patch
Type: text/x-patch
Size: 2219 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180119/35d0dd94/attachment.bin>
More information about the llvm-commits
mailing list