[PATCH] D34071: [CGP, PowerPC] try to constant fold before creating loads for memcmp expansion

Sanjay Patel via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 9 16:40:17 PDT 2017


spatel created this revision.
Herald added a subscriber: mcrosier.

I think this is the last step needed to avoid regressions for x86 before we flip the switch to allow expansion of the smallest set of memcpy() via CGP. The DAG version checks for constant strings, so we need to do that here too.

FWIW, the 2 constant test is not handled by LibCallSimplifier::optimizeMemCmp() because that code is limited to 8-bit constant arrays. LibCallSimplifier will also fail to optimize some 1 constant tests because its alignment requirements are too strict (shouldn't require alignment for a constant operand).


https://reviews.llvm.org/D34071

Files:
  lib/CodeGen/CodeGenPrepare.cpp
  test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll


Index: test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
===================================================================
--- test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
+++ test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll
@@ -168,26 +168,8 @@
 ; Validate with memcmp()?:
 define signext i32 @equalityFoldTwoConstants() {
 ; CHECK-LABEL: equalityFoldTwoConstants:
-; CHECK:       # BB#0: # %loadbb
-; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1 at toc@ha
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2 at toc@ha
-; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1 at toc@l(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2 at toc@l(4)
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    bne 0, .LBB5_2
-; CHECK-NEXT:  # BB#1: # %loadbb1
-; CHECK-NEXT:    addis 3, 2, .LzeroEqualityTest04.buffer1 at toc@ha+8
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer2 at toc@ha+8
-; CHECK-NEXT:    ld 3, .LzeroEqualityTest04.buffer1 at toc@l+8(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer2 at toc@l+8(4)
-; CHECK-NEXT:    cmpld 3, 4
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    beq 0, .LBB5_3
-; CHECK-NEXT:  .LBB5_2: # %res_block
+; CHECK:       # BB#0: # %endblock
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB5_3: # %endblock
-; CHECK-NEXT:    cntlzw 3, 3
-; CHECK-NEXT:    srwi 3, 3, 5
 ; CHECK-NEXT:    blr
   %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
   %not.tobool = icmp eq i32 %call, 0
@@ -198,16 +180,17 @@
 define signext i32 @equalityFoldOneConstant(i8* %X) {
 ; CHECK-LABEL: equalityFoldOneConstant:
 ; CHECK:       # BB#0: # %loadbb
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1 at toc@ha
+; CHECK-NEXT:    li 4, 1
 ; CHECK-NEXT:    ld 5, 0(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1 at toc@l(4)
-; CHECK-NEXT:    cmpld 4, 5
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    cmpld 5, 4
 ; CHECK-NEXT:    bne 0, .LBB6_2
 ; CHECK-NEXT:  # BB#1: # %loadbb1
-; CHECK-NEXT:    addis 4, 2, .LzeroEqualityTest04.buffer1 at toc@ha+8
+; CHECK-NEXT:    li 4, 3
 ; CHECK-NEXT:    ld 3, 8(3)
-; CHECK-NEXT:    ld 4, .LzeroEqualityTest04.buffer1 at toc@l+8(4)
-; CHECK-NEXT:    cmpld 4, 3
+; CHECK-NEXT:    sldi 4, 4, 32
+; CHECK-NEXT:    ori 4, 4, 2
+; CHECK-NEXT:    cmpld 3, 4
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    beq 0, .LBB6_3
 ; CHECK-NEXT:  .LBB6_2: # %res_block
Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -1851,9 +1851,19 @@
                                   ConstantInt::get(LoadSizeType, GEPIndex));
     }
 
-    // Load LoadSizeType from the base address.
-    Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
-    Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+    // Get a constant or load a value for each source address.
+    Value *LoadSrc1 = nullptr;
+    if (auto *Source1C = dyn_cast<Constant>(Source1))
+      LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
+    if (!LoadSrc1)
+      LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
+
+    Value *LoadSrc2 = nullptr;
+    if (auto *Source2C = dyn_cast<Constant>(Source2))
+      LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
+    if (!LoadSrc2)
+      LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
+
     if (NumLoads != 1) {
       if (LoadSizeType != MaxLoadType) {
         LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D34071.102094.patch
Type: text/x-patch
Size: 3629 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170609/ebd3ee27/attachment.bin>


More information about the llvm-commits mailing list