[llvm] r322972 - [ARM] Fix perf regression in compare optimization.

Joel Galenson via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 19 09:46:27 PST 2018


Author: jgalenson
Date: Fri Jan 19 09:46:27 2018
New Revision: 322972

URL: http://llvm.org/viewvc/llvm-project?rev=322972&view=rev
Log:
[ARM] Fix perf regression in compare optimization.

Fix a performance regression caused by r322737.

While trying to make it easier to replace compares with existing adds and
subtracts, I accidentally stopped it from doing so in some cases.  This should
fix that.  I'm also fixing another potential bug in that commit.

Differential Revision: https://reviews.llvm.org/D42263

Modified:
    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/trunk/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll

Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=322972&r1=322971&r2=322972&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Fri Jan 19 09:46:27 2018
@@ -2736,7 +2736,7 @@ bool ARMBaseInstrInfo::optimizeCompareIn
     }
     I = CmpInstr;
     E = MI;
-  } else {
+  } else if (E != B) {
     // Allow the loop below to search E (which was initially MI).  Since MI and
     // SubAdd have different tests, even if that instruction could not be MI, it
     // could still potentially be SubAdd.
@@ -2763,8 +2763,7 @@ bool ARMBaseInstrInfo::optimizeCompareIn
       return false;
 
     if (I == B)
-      // The 'and' is below the comparison instruction.
-      return false;
+      break;
   }
 
   // Return false if no candidates exist.

Modified: llvm/trunk/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll?rev=322972&r1=322971&r2=322972&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll Fri Jan 19 09:46:27 2018
@@ -197,6 +197,38 @@ cont1:
 
 declare void @external_fn(...) local_unnamed_addr #0
 
+define i32 @are_equal(i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: are_equal
+; CHECK: subs r{{[0-9]+}}, r{{[0-9]+}}, #1
+; CHECK-NEXT: bne
+entry:
+  %tobool7 = icmp eq i32 %n, 0
+  br i1 %tobool7, label %while.end, label %land.rhs.preheader
+
+land.rhs.preheader:
+  br label %land.rhs
+
+while.cond:
+  %tobool = icmp eq i32 %dec9, 0
+  br i1 %tobool, label %while.end, label %land.rhs
+
+land.rhs:
+  %dec9.in = phi i32 [ %dec9, %while.cond ], [ %n, %land.rhs.preheader ]
+  %dec9 = add nsw i32 %dec9.in, -1
+  %arrayidx = getelementptr inbounds i32, i32* %a1, i32 %dec9
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %a2, i32 %dec9
+  %1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %while.cond, label %while.end
+
+while.end:
+  %n.addr.0.lcssa = phi i32 [ 0, %entry ], [ 0, %while.cond ], [ %dec9.in, %land.rhs ]
+  %cmp2 = icmp slt i32 %n.addr.0.lcssa, 1
+  %conv = zext i1 %cmp2 to i32
+  ret i32 %conv
+}
+
 declare void @llvm.trap() #2
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1




More information about the llvm-commits mailing list