[llvm] 9237fde - [CGP] Prevent optimizePhiType from iterating forever

Sun Sep 13 08:11:14 PDT 2020

Author: David Green
Date: 2020-09-13T16:11:01+01:00
New Revision: 9237fde48139400764377eb73e7e5d3bc5b7fffc

URL: https://github.com/llvm/llvm-project/commit/9237fde48139400764377eb73e7e5d3bc5b7fffc
DIFF: https://github.com/llvm/llvm-project/commit/9237fde48139400764377eb73e7e5d3bc5b7fffc.diff

LOG: [CGP] Prevent optimizePhiType from iterating forever

The recently added optimizePhiType algorithm had no checks to make sure
it didn't continually iterate backward and forth between float and int
types. This means that given an input like store(phi(bitcast(load))), we
could convert that back and forth to store(bitcast(phi(load))). This
particular case would usually have been simplified to a different load
type (folding the bitcast into the load) before CGP, but other cases can
occur. The one that came up was phi(bitcast(phi)), where the two phi's
of different types were bitcast between. That was not helped by a dead
bitcast being kept around which could make conversion look profitable.

This adds an extra check of the bitcast Uses or Defs, to make sure that
at least one is grounded and will not end up being converted back. It
also makes sure that dead bitcasts are removed, and there is a minor
change to include newly created Phi nodes in the Visited set so that
they do not need to be revisited.

Differential Revision: https://reviews.llvm.org/D82676

Added: 
    

Modified: 
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/test/CodeGen/AArch64/convertphitype.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 3e5dceccf49b..529975c33ec1 100644

--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5807,6 +5807,12 @@ bool CodeGenPrepare::optimizePhiType(
   Visited.insert(I);
   SmallPtrSet<Instruction *, 4> Defs;
   SmallPtrSet<Instruction *, 4> Uses;
+  // This works by adding extra bitcasts between load/stores and removing
+  // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
+  // we can get in the situation where we remove a bitcast in one iteration
+  // just to add it again in the next. We need to ensure that at least one
+  // bitcast we remove are anchored to something that will not change back.
+  bool AnyAnchored = false;
 
   while (!Worklist.empty()) {
     Instruction *II = Worklist.pop_back_val();
@@ -5840,9 +5846,12 @@ bool CodeGenPrepare::optimizePhiType(
           if (!Defs.count(OpBC)) {
             Defs.insert(OpBC);
             Worklist.push_back(OpBC);
+            AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
+                           !isa<ExtractElementInst>(OpBC->getOperand(0));
           }
-        } else if (!isa<UndefValue>(V))
+        } else if (!isa<UndefValue>(V)) {
           return false;
+        }
       }
     }
 
@@ -5866,12 +5875,15 @@ bool CodeGenPrepare::optimizePhiType(
         if (OpBC->getType() != ConvertTy)
           return false;
         Uses.insert(OpBC);
-      } else
+        AnyAnchored |=
+            any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
+      } else {
         return false;
+      }
     }
   }
 
-  if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+  if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
     return false;
 
   LLVM_DEBUG(dbgs() << "Converting " << *I << "\n  and connected nodes to "
@@ -5882,11 +5894,13 @@ bool CodeGenPrepare::optimizePhiType(
   ValueToValueMap ValMap;
   ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
   for (Instruction *D : Defs) {
-    if (isa<BitCastInst>(D))
+    if (isa<BitCastInst>(D)) {
       ValMap[D] = D->getOperand(0);
-    else
+      DeletedInstrs.insert(D);
+    } else {
       ValMap[D] =
           new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+    }
   }
   for (PHINode *Phi : PhiNodes)
     ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
@@ -5897,15 +5911,17 @@ bool CodeGenPrepare::optimizePhiType(
     for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
       NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
                           Phi->getIncomingBlock(i));
+    Visited.insert(NewPhi);
   }
   // And finally pipe up the stores and bitcasts
   for (Instruction *U : Uses) {
     if (isa<BitCastInst>(U)) {
       DeletedInstrs.insert(U);
       U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
-    } else
+    } else {
       U->setOperand(0,
                     new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+    }
   }
 
   // Save the removed phis to be deleted later.

diff  --git a/llvm/test/CodeGen/AArch64/convertphitype.ll b/llvm/test/CodeGen/AArch64/convertphitype.ll
index bb82ea2905c1..2e3530de378b 100644
--- a/llvm/test/CodeGen/AArch64/convertphitype.ll
+++ b/llvm/test/CodeGen/AArch64/convertphitype.ll
@@ -70,14 +70,13 @@ define float @convphi3(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-LABEL: @convphi3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    [[FB:%.*]] = bitcast float [[F:%.*]] to i32
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
 ; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[PHI_TC]]
 ;
 entry:
@@ -99,14 +98,13 @@ define void @convphi4(i32 *%s, i32 *%d, i32 %n, float %f) {
 ; CHECK-LABEL: @convphi4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    [[FB:%.*]] = bitcast float [[F:%.*]] to i32
 ; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
 ; CHECK:       then:
 ; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
 ; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[F:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast float [[PHI_TC]] to i32
 ; CHECK-NEXT:    store i32 [[BC]], i32* [[D:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -481,6 +479,201 @@ end:
   ret float %b
 }
 
+define void @convphi_stop(i32 *%s, i32 *%d, float *%e, i32 %n) {
+; CHECK-LABEL: @convphi_stop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
+; CHECK-NEXT:    store float [[B]], float* [[E:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  br label %end
+
+else:
+  %ld = load i32, i32* %d, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %else ]
+  %b = bitcast i32 %phi to float
+  store float %b, float* %e, align 4
+  ret void
+}
+
+define void @convphi_stop2(i32 *%s, i32 *%d, float *%e, i32 %n) {
+; CHECK-LABEL: @convphi_stop2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LSB:%.*]] = bitcast i32 [[LS]] to float
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LDB:%.*]] = bitcast i32 [[LD]] to float
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[LSB]], [[THEN]] ], [ [[LDB]], [[ELSE]] ]
+; CHECK-NEXT:    store float [[PHI]], float* [[E:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  %lsb = bitcast i32 %ls to float
+  br label %end
+
+else:
+  %ld = load i32, i32* %d, align 4
+  %ldb = bitcast i32 %ld to float
+  br label %end
+
+end:
+  %phi = phi float [ %lsb, %then ], [ %ldb, %else ]
+  store float %phi, float* %e, align 4
+  ret void
+}
+
+define float @convphi_stop3(i32 *%s, i32 *%d, float *%e, i32 %n) {
+; CHECK-LABEL: @convphi_stop3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ELSE]] ]
+; CHECK-NEXT:    store float [[PHI_TC]], float* [[E:%.*]], align 4
+; CHECK-NEXT:    ret float [[PHI_TC]]
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  br label %end
 
+else:
+  %ld = load i32, i32* %d, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %else ]
+  %b = bitcast i32 %phi to float
+  store float %b, float* %e, align 4
+  ret float %b
+}
 
+define void @convphi_stop4(i32 *%s, i32 *%d, float *%e, i32 %n) {
+; CHECK-LABEL: @convphi_stop4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D:%.*]], align 4
+; CHECK-NEXT:    [[LD_BC:%.*]] = bitcast i32 [[LD]] to float
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[LS_BC:%.*]] = bitcast i32 [[LS]] to float
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI_TC:%.*]] = phi float [ [[LS_BC]], [[THEN]] ], [ [[LD_BC]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    [[BC:%.*]] = bitcast float [[PHI_TC]] to i32
+; CHECK-NEXT:    store i32 [[BC]], i32* [[S]], align 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[THEN2:%.*]], label [[END2:%.*]]
+; CHECK:       then2:
+; CHECK-NEXT:    [[LF:%.*]] = load float, float* [[E:%.*]], align 4
+; CHECK-NEXT:    br label [[END2]]
+; CHECK:       end2:
+; CHECK-NEXT:    [[PHI2:%.*]] = phi float [ [[PHI_TC]], [[END]] ], [ [[LF]], [[THEN2]] ]
+; CHECK-NEXT:    store float [[PHI2]], float* [[E]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  %ld = load i32, i32* %d, align 4
+  br i1 %cmp15, label %then, label %end
 
+then:
+  %ls = load i32, i32* %s, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %entry ]
+  %phib = bitcast i32 %phi to float
+  store i32 %phi, i32* %s, align 4
+  br i1 %cmp15, label %then2, label %end2
+
+then2:
+  %lf = load float, float* %e, align 4
+  br label %end2
+
+end2:
+  %phi2 = phi float [ %phib, %end ], [ %lf, %then2 ]
+  store float %phi2, float* %e, align 4
+  ret void
+}
+
+define float @multiuse(i32 *%s, i32 *%d, i32 %n) {
+; CHECK-LABEL: @multiuse(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP15]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[LS:%.*]] = load i32, i32* [[S:%.*]], align 4
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[LS]], 2
+; CHECK-NEXT:    store i32 [[A]], i32* [[D:%.*]], align 4
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    [[LD:%.*]] = load i32, i32* [[D]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LS]], [[THEN]] ], [ [[LD]], [[ELSE]] ]
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32 [[PHI]] to float
+; CHECK-NEXT:    ret float [[B]]
+;
+entry:
+  %cmp15 = icmp sgt i32 %n, 0
+  br i1 %cmp15, label %then, label %else
+
+then:
+  %ls = load i32, i32* %s, align 4
+  %a = add i32 %ls, 2
+  store i32 %a, i32* %d, align 4
+  br label %end
+
+else:
+  %ld = load i32, i32* %d, align 4
+  br label %end
+
+end:
+  %phi = phi i32 [ %ls, %then ], [ %ld, %else ]
+  %b = bitcast i32 %phi to float
+  ret float %b
+}