[llvm] 5d940b7 - Reapply "SROA: Enhance speculateSelectInstLoads"

Wed Aug 11 20:07:09 PDT 2021

Author: Christudasan Devadasan
Date: 2021-08-11T22:58:54-04:00
New Revision: 5d940b71ae71534ab4b84285b5293dd62d4d5b35

URL: https://github.com/llvm/llvm-project/commit/5d940b71ae71534ab4b84285b5293dd62d4d5b35
DIFF: https://github.com/llvm/llvm-project/commit/5d940b71ae71534ab4b84285b5293dd62d4d5b35.diff

LOG: Reapply "SROA: Enhance speculateSelectInstLoads"

Originally committed as ffc3fb665d0a0dccd64cc8c803ad8cc1a0d5dfa1
Reverted in fcf2d5f40296be4e0f0e954001beb7814f97a212 due to an
assertion failure.

Original commit message:

Allow the folding even if there is an
intervening bitcast.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D106667

Added: 
    llvm/test/Transforms/SROA/select-load.ll

Modified: 
    llvm/lib/Transforms/Scalar/SROA.cpp
    llvm/test/Transforms/SROA/phi-and-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index b3c4308ae80cc..15934f54c2137 100644

--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1330,14 +1330,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
 ///   %V = select i1 %cond, i32 %V1, i32 %V2
 ///
 /// We can do this to a select if its only uses are loads and if the operand
-/// to the select can be loaded unconditionally.
+/// to the select can be loaded unconditionally. If found an intervening bitcast
+/// with a single use of the load, allow the promotion.
 static bool isSafeSelectToSpeculate(SelectInst &SI) {
   Value *TValue = SI.getTrueValue();
   Value *FValue = SI.getFalseValue();
   const DataLayout &DL = SI.getModule()->getDataLayout();
 
   for (User *U : SI.users()) {
-    LoadInst *LI = dyn_cast<LoadInst>(U);
+    LoadInst *LI;
+    BitCastInst *BC = dyn_cast<BitCastInst>(U);
+    if (BC && BC->hasOneUse())
+      LI = dyn_cast<LoadInst>(*BC->user_begin());
+    else
+      LI = dyn_cast<LoadInst>(U);
+
     if (!LI || !LI->isSimple())
       return false;
 
@@ -1363,13 +1370,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
   Value *FV = SI.getFalseValue();
   // Replace the loads of the select with a select of two loads.
   while (!SI.use_empty()) {
-    LoadInst *LI = cast<LoadInst>(SI.user_back());
+    LoadInst *LI;
+    BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
+    if (BC) {
+      assert(BC->hasOneUse() && "Bitcast should have a single use.");
+      LI = cast<LoadInst>(BC->user_back());
+    } else {
+      LI = cast<LoadInst>(SI.user_back());
+    }
+
     assert(LI->isSimple() && "We only speculate simple loads");
 
     IRB.SetInsertPoint(LI);
-    LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+    Value *NewTV =
+        BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
+           : TV;
+    Value *NewFV =
+        BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
+           : FV;
+    LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
                                   LI->getName() + ".sroa.speculate.load.true");
-    LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+    LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
                                   LI->getName() + ".sroa.speculate.load.false");
     NumLoadsSpeculated += 2;
 
@@ -1390,6 +1411,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
     LLVM_DEBUG(dbgs() << "          speculated to: " << *V << "\n");
     LI->replaceAllUsesWith(V);
     LI->eraseFromParent();
+    if (BC)
+      BC->eraseFromParent();
   }
   SI.eraseFromParent();
 }

diff  --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index c773714981359..d1863359b1630 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -60,23 +60,14 @@ entry:
   ret i32 %result
 }
 
-; If bitcast isn't considered a safe phi/select use, the alloca
-; remains as an array.
-; FIXME: Why isn't this identical to test2?
 define float @test2_bitcast() {
 ; CHECK-LABEL: @test2_bitcast(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[A_SROA_3:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[A_SROA_0]], align 4
-; CHECK-NEXT:    store i32 1, i32* [[A_SROA_3]], align 4
-; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_V0:%.*]] = load i32, i32* [[A_SROA_0]], align 4
-; CHECK-NEXT:    [[A_SROA_3_0_A_SROA_3_4_V1:%.*]] = load i32, i32* [[A_SROA_3]], align 4
-; CHECK-NEXT:    [[COND:%.*]] = icmp sle i32 [[A_SROA_0_0_A_SROA_0_0_V0]], [[A_SROA_3_0_A_SROA_3_4_V1]]
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], i32* [[A_SROA_3]], i32* [[A_SROA_0]]
-; CHECK-NEXT:    [[SELECT_BC:%.*]] = bitcast i32* [[SELECT]] to float*
-; CHECK-NEXT:    [[RESULT:%.*]] = load float, float* [[SELECT_BC]], align 4
-; CHECK-NEXT:    ret float [[RESULT]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp sle i32 0, 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32 1 to float
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 0 to float
+; CHECK-NEXT:    [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], float [[TMP0]], float [[TMP1]]
+; CHECK-NEXT:    ret float [[RESULT_SROA_SPECULATED]]
 ;
 entry:
   %a = alloca [2 x i32]

diff  --git a/llvm/test/Transforms/SROA/select-load.ll b/llvm/test/Transforms/SROA/select-load.ll
new file mode 100644
index 0000000000000..680cfd3bd4944
--- /dev/null
+++ b/llvm/test/Transforms/SROA/select-load.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -sroa < %s | FileCheck %s
+
+%st.half = type { half }
+
+; Allow speculateSelectInstLoads to fold load and select
+; even if there is an intervening bitcast.
+define <2 x i16> @test_load_bitcast_select(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @test_load_bitcast_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half 0xHFFFF to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half 0xH0000 to i16
+; CHECK-NEXT:    [[LD1_SROA_SPECULATED:%.*]] = select i1 [[COND1:%.*]], i16 [[TMP0]], i16 [[TMP1]]
+; CHECK-NEXT:    [[V1:%.*]] = insertelement <2 x i16> undef, i16 [[LD1_SROA_SPECULATED]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast half 0xHFFFF to i16
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half 0xH0000 to i16
+; CHECK-NEXT:    [[LD2_SROA_SPECULATED:%.*]] = select i1 [[COND2:%.*]], i16 [[TMP2]], i16 [[TMP3]]
+; CHECK-NEXT:    [[V2:%.*]] = insertelement <2 x i16> [[V1]], i16 [[LD2_SROA_SPECULATED]], i32 1
+; CHECK-NEXT:    ret <2 x i16> [[V2]]
+;
+entry:
+  %true = alloca half, align 2
+  %false = alloca half, align 2
+  store half 0xHFFFF, half* %true, align 2
+  store half 0xH0000, half* %false, align 2
+  %false.cast = bitcast half* %false to %st.half*
+  %true.cast = bitcast half* %true to %st.half*
+  %sel1 = select i1 %cond1, %st.half* %true.cast, %st.half* %false.cast
+  %cast1 = bitcast %st.half* %sel1 to i16*
+  %ld1 = load i16, i16* %cast1, align 2
+  %v1 = insertelement <2 x i16> undef, i16 %ld1, i32 0
+  %sel2 = select i1 %cond2, %st.half* %true.cast, %st.half* %false.cast
+  %cast2 = bitcast %st.half* %sel2 to i16*
+  %ld2 = load i16, i16* %cast2, align 2
+  %v2 = insertelement <2 x i16> %v1, i16 %ld2, i32 1
+  ret <2 x i16> %v2
+}
+
+%st.args = type { i32, i32* }
+
+; A bitcasted load and a direct load of select.
+define void @test_multiple_loads_select(i1 %cmp){
+; CHECK-LABEL: @test_multiple_loads_select(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* undef to i8*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* undef to i8*
+; CHECK-NEXT:    [[ADDR_I8_SROA_SPECULATED:%.*]] = select i1 [[CMP:%.*]], i8* [[TMP0]], i8* [[TMP1]]
+; CHECK-NEXT:    call void @foo_i8(i8* [[ADDR_I8_SROA_SPECULATED]])
+; CHECK-NEXT:    [[ADDR_I32_SROA_SPECULATED:%.*]] = select i1 [[CMP]], i32* undef, i32* undef
+; CHECK-NEXT:    call void @foo_i32(i32* [[ADDR_I32_SROA_SPECULATED]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %args = alloca [2 x %st.args], align 16
+  %arr0 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 0
+  %arr1 = getelementptr inbounds [2 x %st.args], [2 x %st.args]* %args, i64 0, i64 1
+  %sel = select i1 %cmp, %st.args* %arr1, %st.args* %arr0
+  %addr = getelementptr inbounds %st.args, %st.args* %sel, i64 0, i32 1
+  %bcast.i8 = bitcast i32** %addr to i8**
+  %addr.i8 = load i8*, i8** %bcast.i8, align 8
+  call void @foo_i8(i8* %addr.i8)
+  %addr.i32 = load i32*, i32** %addr, align 8
+  call void @foo_i32 (i32* %addr.i32)
+  ret void
+}
+
+declare void @foo_i8(i8*)
+declare void @foo_i32(i32*)