[PATCH] D68654: [CVP} Replace SExt with ZExt if the input is known-non-negative

Tue Oct 8 11:16:35 PDT 2019

lebedev.ri created this revision.
lebedev.ri added reviewers: nikic, spatel, reames, dberlin.
lebedev.ri added a project: LLVM.
Herald added subscribers: jfb, hiraditya.

zero-extension is far more friendly for further analysis.
While this doesn't directly help with the shift-by-signext problem, this is not unrelated.

This has the following effect on test-suite (numbers collected after the finish of middle-end module pass manager):

| Statistic                            | old    | new    | delta | percent change |
| correlated-value-propagation.NumSExt | 0      | 6026   | 6026  | +100.00%       |
| instcount.NumAddInst                 | 272860 | 271283 | -1577 | -0.58%         |
| instcount.NumAllocaInst              | 27227  | 27226  | -1    | 0.00%          |
| instcount.NumAndInst                 | 63502  | 63320  | -182  | -0.29%         |
| instcount.NumAShrInst                | 13498  | 13407  | -91   | -0.67%         |
| instcount.NumAtomicCmpXchgInst       | 1159   | 1159   | 0     | 0.00%          |
| instcount.NumAtomicRMWInst           | 5036   | 5036   | 0     | 0.00%          |
| instcount.NumBitCastInst             | 672482 | 672353 | -129  | -0.02%         |
| instcount.NumBrInst                  | 702768 | 702195 | -573  | -0.08%         |
| instcount.NumCallInst                | 518285 | 518205 | -80   | -0.02%         |
| instcount.NumExtractElementInst      | 18481  | 18482  | 1     | 0.01%          |
| instcount.NumExtractValueInst        | 18290  | 18288  | -2    | -0.01%         |
| instcount.NumFAddInst                | 139035 | 138963 | -72   | -0.05%         |
| instcount.NumFCmpInst                | 10358  | 10348  | -10   | -0.10%         |
| instcount.NumFDivInst                | 30310  | 30302  | -8    | -0.03%         |
| instcount.NumFenceInst               | 387    | 387    | 0     | 0.00%          |
| instcount.NumFMulInst                | 93873  | 93806  | -67   | -0.07%         |
| instcount.NumFPExtInst               | 7148   | 7144   | -4    | -0.06%         |
| instcount.NumFPToSIInst              | 2823   | 2838   | 15    | 0.53%          |
| instcount.NumFPToUIInst              | 1251   | 1251   | 0     | 0.00%          |
| instcount.NumFPTruncInst             | 2195   | 2191   | -4    | -0.18%         |
| instcount.NumFSubInst                | 92109  | 92103  | -6    | -0.01%         |
| instcount.NumGetElementPtrInst       | 1221423      | 1219157      | -2266 | -0.19%         |
| instcount.NumICmpInst                | 479140 | 478929 | -211  | -0.04%         |
| instcount.NumIndirectBrInst          | 2      | 2      | 0     | 0.00%          |
| instcount.NumInsertElementInst       | 66089  | 66094  | 5     | 0.01%          |
| instcount.NumInsertValueInst         | 2032   | 2030   | -2    | -0.10%         |
| instcount.NumIntToPtrInst            | 19641  | 19641  | 0     | 0.00%          |
| instcount.NumInvokeInst              | 21789  | 21788  | -1    | 0.00%          |
| instcount.NumLandingPadInst          | 12051  | 12051  | 0     | 0.00%          |
| instcount.NumLoadInst                | 880079 | 878673 | -1406 | -0.16%         |
| instcount.NumLShrInst                | 25919  | 25921  | 2     | 0.01%          |
| instcount.NumMulInst                 | 42416  | 42417  | 1     | 0.00%          |
| instcount.NumOrInst                  | 100826 | 100576 | -250  | -0.25%         |
| instcount.NumPHIInst                 | 315118 | 314092 | -1026 | -0.33%         |
| instcount.NumPtrToIntInst            | 15933  | 15939  | 6     | 0.04%          |
| instcount.NumResumeInst              | 2156   | 2156   | 0     | 0.00%          |
| instcount.NumRetInst                 | 84485  | 84484  | -1    | 0.00%          |
| instcount.NumSDivInst                | 8599   | 8597   | -2    | -0.02%         |
| instcount.NumSelectInst              | 45577  | 45913  | 336   | 0.74%          |
| instcount.NumSExtInst                | 84026  | 78278  | -5748 | -6.84%         |
| instcount.NumShlInst                 | 39796  | 39726  | -70   | -0.18%         |
| instcount.NumShuffleVectorInst       | 100272 | 100292 | 20    | 0.02%          |
| instcount.NumSIToFPInst              | 29131  | 29113  | -18   | -0.06%         |
| instcount.NumSRemInst                | 1543   | 1543   | 0     | 0.00%          |
| instcount.NumStoreInst               | 805394 | 804351 | -1043 | -0.13%         |
| instcount.NumSubInst                 | 61337  | 61414  | 77    | 0.13%          |
| instcount.NumSwitchInst              | 8527   | 8524   | -3    | -0.04%         |
| instcount.NumTruncInst               | 60523  | 60484  | -39   | -0.06%         |
| instcount.NumUDivInst                | 2381   | 2381   | 0     | 0.00%          |
| instcount.NumUIToFPInst              | 5549   | 5549   | 0     | 0.00%          |
| instcount.NumUnreachableInst         | 9855   | 9855   | 0     | 0.00%          |
| instcount.NumURemInst                | 1305   | 1305   | 0     | 0.00%          |
| instcount.NumXorInst                 | 10230  | 10081  | -149  | -1.46%         |
| instcount.NumZExtInst                | 60353  | 66840  | 6487  | 10.75%         |
| instcount.TotalBlocks                | 829582 | 829004 | -578  | -0.07%         |
| instcount.TotalFuncs                 | 83818  | 83817  | -1    | 0.00%          |
| instcount.TotalInsts                 | 7316574      | 7308483      | -8091 | -0.11%         |
|

TLDR: we produce -0.11% less instructions, -6.84% less `sext`, +10.75% more `zext`.
To be noted, clearly, not all new `zext`'s are produced by this fold.

(And now i guess it might have been interesting to measure this for D68103 <https://reviews.llvm.org/D68103> :S)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D68654

Files:
  llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
  llvm/test/Transforms/CorrelatedValuePropagation/sext.ll


Index: llvm/test/Transforms/CorrelatedValuePropagation/sext.ll
===================================================================

--- llvm/test/Transforms/CorrelatedValuePropagation/sext.ll
+++ llvm/test/Transforms/CorrelatedValuePropagation/sext.ll
@@ -18,9 +18,9 @@
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[EXT_WIDE:%.*]] = sext i32 [[A]] to i64
-; CHECK-NEXT:    call void @use64(i64 [[EXT_WIDE]])
-; CHECK-NEXT:    [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT:    [[EXT_WIDE1:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT:    call void @use64(i64 [[EXT_WIDE1]])
+; CHECK-NEXT:    [[EXT]] = trunc i64 [[EXT_WIDE1]] to i32
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -85,9 +85,9 @@
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
 ; CHECK:       bb:
-; CHECK-NEXT:    [[EXT_WIDE:%.*]] = sext i32 [[N]] to i64
-; CHECK-NEXT:    call void @use64(i64 [[EXT_WIDE]])
-; CHECK-NEXT:    [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
+; CHECK-NEXT:    [[EXT_WIDE1:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:    call void @use64(i64 [[EXT_WIDE1]])
+; CHECK-NEXT:    [[EXT:%.*]] = trunc i64 [[EXT_WIDE1]] to i32
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
Index: llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -62,6 +62,7 @@
 STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
+STATISTIC(NumSExt,      "Number of sext converted to zext");
 STATISTIC(NumOverflows, "Number of overflow checks removed");
 STATISTIC(NumSaturating,
     "Number of saturating arithmetics converted to normal arithmetics");
@@ -637,6 +638,27 @@
   return true;
 }
 
+static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
+  if (SDI->getType()->isVectorTy())
+    return false;
+
+  Value *Base = SDI->getOperand(0);
+
+  Constant *Zero = ConstantInt::get(Base->getType(), 0);
+  if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) !=
+      LazyValueInfo::True)
+    return false;
+
+  ++NumSExt;
+  auto *ZExt =
+      CastInst::CreateZExtOrBitCast(Base, SDI->getType(), SDI->getName(), SDI);
+  ZExt->setDebugLoc(SDI->getDebugLoc());
+  SDI->replaceAllUsesWith(ZExt);
+  SDI->eraseFromParent();
+
+  return true;
+}
+
 static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
   using OBO = OverflowingBinaryOperator;
 
@@ -745,6 +767,9 @@
       case Instruction::AShr:
         BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
         break;
+      case Instruction::SExt:
+        BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+        break;
       case Instruction::Add:
       case Instruction::Sub:
         BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68654.223904.patch
Type: text/x-patch
Size: 3250 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191008/99a720e4/attachment.bin>