[llvm] r304849 - [CGP / PowerPC] use direct compares if there's only one load per block in memcmp() expansion

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 6 17:17:08 PDT 2017


Author: spatel
Date: Tue Jun  6 19:17:08 2017
New Revision: 304849

URL: http://llvm.org/viewvc/llvm-project?rev=304849&view=rev
Log:
[CGP / PowerPC] use direct compares if there's only one load per block in memcmp() expansion

I'd like to enable CGP memcmp expansion for x86, but the output from CGP would regress the 
special cases (memcmp(x,y,N) != 0 for N=1,2,4,8,16,32 bytes) that we already handle.

I'm not sure if we'll actually be able to produce the optimal code given the block-at-a-time 
limitation in the DAG. We might have to just avoid those special-cases here in CGP. But 
regardless of that, I think this is a win for the more general cases.

http://rise4fun.com/Alive/cbQ

Differential Revision: https://reviews.llvm.org/D33963

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=304849&r1=304848&r2=304849&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Tue Jun  6 19:17:08 2017
@@ -1812,7 +1812,7 @@ void MemCmpExpansion::emitLoadCompareBlo
   unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
 
   Builder.SetInsertPoint(LoadCmpBlocks[Index]);
-
+  Value *Cmp = nullptr;
   for (unsigned i = 0; i < NumLoads; ++i) {
     unsigned LoadSize = getLoadSize(RemainingBytes);
     unsigned GEPIndex = NumBytesProcessed / LoadSize;
@@ -1846,9 +1846,16 @@ void MemCmpExpansion::emitLoadCompareBlo
       LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);
       LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType);
     }
-    Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
-    Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
-    XorList.push_back(Diff);
+    if (NumLoads != 1) {
+      // If we have multiple loads per block, we need to generate a composite
+      // comparison using xor+or.
+      Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
+      Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType);
+      XorList.push_back(Diff);
+    } else {
+      // If there's only one load per block, we just compare the loaded values.
+      Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+    }
   }
 
   auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
@@ -1862,16 +1869,17 @@ void MemCmpExpansion::emitLoadCompareBlo
     return OutList;
   };
 
-  // Pairwise OR the XOR results.
-  OrList = pairWiseOr(XorList);
-
-  // Pairwise OR the OR results until one result left.
-  while (OrList.size() != 1) {
-    OrList = pairWiseOr(OrList);
+  if (!Cmp) {
+    // Pairwise OR the XOR results.
+    OrList = pairWiseOr(XorList);
+
+    // Pairwise OR the OR results until one result left.
+    while (OrList.size() != 1) {
+      OrList = pairWiseOr(OrList);
+    }
+    Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
   }
 
-  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0],
-                                  ConstantInt::get(Diff->getType(), 0));
   BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
                            ? EndBlock
                            : LoadCmpBlocks[Index + 1];

Modified: llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll?rev=304849&r1=304848&r2=304849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll Tue Jun  6 19:17:08 2017
@@ -21,8 +21,7 @@ define signext i32 @zeroEqualityTest02(i
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    lwz 4, 0(4)
 ; CHECK-NEXT:    li 5, 1
-; CHECK-NEXT:    xor 3, 3, 4
-; CHECK-NEXT:    cmplwi 3, 0
+; CHECK-NEXT:    cmpld 3, 4
 ; CHECK-NEXT:    isel 3, 0, 5, 2
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    blr
@@ -38,19 +37,19 @@ define signext i32 @zeroEqualityTest01(i
 ; CHECK:       # BB#0: # %loadbb
 ; CHECK-NEXT:    ld 5, 0(3)
 ; CHECK-NEXT:    ld 6, 0(4)
-; CHECK-NEXT:    xor. 5, 5, 6
+; CHECK-NEXT:    cmpld 5, 6
 ; CHECK-NEXT:    bne 0, .LBB1_2
 ; CHECK-NEXT:  # BB#1: # %loadbb1
 ; CHECK-NEXT:    ld 3, 8(3)
 ; CHECK-NEXT:    ld 4, 8(4)
-; CHECK-NEXT:    xor. 3, 3, 4
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    beq 0, .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: # %res_block
 ; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB1_3:
-; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB1_3: # %endblock
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
@@ -65,27 +64,24 @@ define signext i32 @zeroEqualityTest03(i
 ; CHECK:       # BB#0: # %loadbb
 ; CHECK-NEXT:    lwz 5, 0(3)
 ; CHECK-NEXT:    lwz 6, 0(4)
-; CHECK-NEXT:    xor 5, 5, 6
-; CHECK-NEXT:    cmplwi 5, 0
+; CHECK-NEXT:    cmpld 5, 6
 ; CHECK-NEXT:    bne 0, .LBB2_3
 ; CHECK-NEXT:  # BB#1: # %loadbb1
 ; CHECK-NEXT:    lhz 5, 4(3)
 ; CHECK-NEXT:    lhz 6, 4(4)
-; CHECK-NEXT:    xor 5, 5, 6
-; CHECK-NEXT:    rlwinm. 5, 5, 0, 16, 31
+; CHECK-NEXT:    cmpld 5, 6
 ; CHECK-NEXT:    bne 0, .LBB2_3
 ; CHECK-NEXT:  # BB#2: # %loadbb2
 ; CHECK-NEXT:    lbz 3, 6(3)
 ; CHECK-NEXT:    lbz 4, 6(4)
-; CHECK-NEXT:    xor 3, 3, 4
-; CHECK-NEXT:    rlwinm. 3, 3, 0, 24, 31
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    beq 0, .LBB2_4
 ; CHECK-NEXT:  .LBB2_3: # %res_block
 ; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:  .LBB2_4: # %endblock
 ; CHECK-NEXT:    clrldi 3, 3, 32
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
@@ -178,24 +174,22 @@ define signext i32 @zeroEqualityTest06()
 ; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2 at toc@ha
 ; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1 at toc@l(3)
 ; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2 at toc@l(4)
-; CHECK-NEXT:    xor. 3, 3, 4
+; CHECK-NEXT:    cmpld 3, 4
 ; CHECK-NEXT:    bne 0, .LBB5_2
 ; CHECK-NEXT:  # BB#1: # %loadbb1
 ; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1 at toc@ha+8
 ; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2 at toc@ha+8
 ; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1 at toc@l+8(3)
 ; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2 at toc@l+8(4)
-; CHECK-NEXT:    xor. 3, 3, 4
-; CHECK-NEXT:    beq 0, .LBB5_4
+; CHECK-NEXT:    cmpld 3, 4
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    beq 0, .LBB5_3
 ; CHECK-NEXT:  .LBB5_2: # %res_block
 ; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:  .LBB5_3: # %endblock
 ; CHECK-NEXT:    cntlzw 3, 3
 ; CHECK-NEXT:    srwi 3, 3, 5
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB5_4:
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB5_3
   %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
   %not.tobool = icmp eq i32 %call, 0
   %cond = zext i1 %not.tobool to i32




More information about the llvm-commits mailing list