[llvm] 0bcfafc - [SeparateConstOffsetFromGEP] Fix: sext(a) + sext(b) -> sext(a + b) matches add and sub instructions with one another

Kevin P. Neal via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 17 09:23:57 PST 2020


Author: Drew Wock
Date: 2020-01-17T12:22:52-05:00
New Revision: 0bcfafc5e71d4f636d456317a3a2e6fd903d4755

URL: https://github.com/llvm/llvm-project/commit/0bcfafc5e71d4f636d456317a3a2e6fd903d4755
DIFF: https://github.com/llvm/llvm-project/commit/0bcfafc5e71d4f636d456317a3a2e6fd903d4755.diff

LOG: [SeparateConstOffsetFromGEP] Fix: sext(a) + sext(b) -> sext(a + b) matches add and sub instructions with one another

During the SeparateConstOffsetFromGEP pass, signed extensions are distributed
to the values that feed into them and then later recombined. The recombination
stage is somewhat problematic- it doesn't differ add and sub instructions
from another when matching the sext(a) +/- sext(b) -> sext(a +/- b) pattern
in some instances.

An example- the IR contains:
%unextendedA
%unextendedB
%subuAuB = unextendedA - unextendedB
%extA = extend A
%extB = extend B
%addeAeB = extA + extB

The problematic optimization will transform that into:

%unextendedA
%unextendedB
%subuAuB = unextendedA - unextendedB
%extA = extend A
%extB = extend B
%addeAeB = extend subuAuB ; Obviously not semantically equivalent to the IR input.

This patch fixes that.

Patch by Drew Wock <drew.wock at sas.com>
Differential Revision: https://reviews.llvm.org/D65967

Added: 
    llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll

Modified: 
    llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 2a1a040bf83e..0eca6704b496 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -431,8 +431,10 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
   bool reuniteExts(Instruction *I);
 
   /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
-  Instruction *findClosestMatchingDominator(const SCEV *Key,
-                                            Instruction *Dominatee);
+  Instruction *findClosestMatchingDominator(
+      const SCEV *Key, Instruction *Dominatee,
+      DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
+
   /// Verify F is free of dead code.
   void verifyNoDeadCode(Function &F);
 
@@ -456,7 +458,8 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
   /// multiple GEPs with a single index.
   bool LowerGEP;
 
-  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
 };
 
 } // end anonymous namespace
@@ -1141,7 +1144,8 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
 }
 
 Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
-    const SCEV *Key, Instruction *Dominatee) {
+    const SCEV *Key, Instruction *Dominatee,
+    DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
   auto Pos = DominatingExprs.find(Key);
   if (Pos == DominatingExprs.end())
     return nullptr;
@@ -1169,12 +1173,23 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
   // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
   // TODO: handle zext
   Value *LHS = nullptr, *RHS = nullptr;
-  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
-      match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
     if (LHS->getType() == RHS->getType()) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
-      if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
+        Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+        NewSExt->takeName(I);
+        I->replaceAllUsesWith(NewSExt);
+        RecursivelyDeleteTriviallyDeadInstructions(I);
+        return true;
+      }
+    }
+  } else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+    if (LHS->getType() == RHS->getType()) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
         Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
         NewSExt->takeName(I);
         I->replaceAllUsesWith(NewSExt);
@@ -1185,12 +1200,17 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
   }
 
   // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
-  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
-      match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
+    if (programUndefinedIfFullPoison(I)) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      DominatingAdds[Key].push_back(I);
+    }
+  } else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
     if (programUndefinedIfFullPoison(I)) {
       const SCEV *Key =
           SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
-      DominatingExprs[Key].push_back(I);
+      DominatingSubs[Key].push_back(I);
     }
   }
   return false;
@@ -1198,7 +1218,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
 
 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
   bool Changed = false;
-  DominatingExprs.clear();
+  DominatingAdds.clear();
+  DominatingSubs.clear();
   for (const auto Node : depth_first(DT)) {
     BasicBlock *BB = Node->getBlock();
     for (auto I = BB->begin(); I != BB->end(); ) {

diff  --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll
new file mode 100644
index 000000000000..c814c04efae6
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/test-add-sub-separation.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -separate-const-offset-from-gep < %s | FileCheck %s
+
+define void @matchingExtensions(i32* %ap, i32* %bp, i64* %result) {
+; CHECK-LABEL: @matchingExtensions(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[AP:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[BP:%.*]]
+; CHECK-NEXT:    [[EB:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT:    [[SUBAB:%.*]] = sub nsw i32 [[A]], [[B]]
+; CHECK-NEXT:    [[EA:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT:    [[ADDEAEB:%.*]] = add nsw i64 [[EA]], [[EB]]
+; CHECK-NEXT:    [[EXTSUB:%.*]] = sext i32 [[SUBAB]] to i64
+; CHECK-NEXT:    [[IDX:%.*]] = getelementptr i32, i32* [[AP]], i64 [[EXTSUB]]
+; CHECK-NEXT:    store i64 [[ADDEAEB]], i64* [[RESULT:%.*]]
+; CHECK-NEXT:    store i32 [[SUBAB]], i32* [[IDX]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = load i32, i32* %ap
+  %b = load i32, i32* %bp
+  %eb = sext i32 %b to i64
+  %subab = sub nsw i32 %a, %b
+  %ea = sext i32 %a to i64
+  %addeaeb = add nsw i64 %ea, %eb
+  %extsub = sext i32 %subab to i64
+  %idx = getelementptr i32, i32* %ap, i64 %extsub
+  store i64 %addeaeb, i64* %result
+  store i32 %subab, i32* %idx
+  ret void
+}


        


More information about the llvm-commits mailing list