[llvm] [X86,SimplifyCFG] Support conditional faulting load or store only (PR #132032)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 06:56:24 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Phoebe Wang (phoebewang)

<details>
<summary>Changes</summary>

This is to fix a bug when a target only support conditional faulting load, see test case hoist_store_without_cstore.

Add option disable-cload-cstore to emulate single load/store case.

---

Patch is 60.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132032.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+24-9) 
- (modified) llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll (+748-166) 


``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17c5fa6503f20..8a8baf9073f48 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -132,6 +132,11 @@ static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
              "to speculatively execute to eliminate conditional branch "
              "(default = 6)"));
 
+static cl::opt<unsigned> DisableCloadCstore(
+    "disable-cload-cstore", cl::Hidden, cl::init(0),
+    cl::desc("Control to disable cond-faulting-load(1)/cond-faulting-store(2)"
+             "/all(3)"));
+
 static cl::opt<unsigned>
     HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
                          cl::init(20),
@@ -1682,22 +1687,22 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
 static void hoistConditionalLoadsStores(
     BranchInst *BI,
     SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
-    std::optional<bool> Invert) {
+    std::optional<bool> Invert, Instruction *Sel) {
   auto &Context = BI->getParent()->getContext();
   auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
   auto *Cond = BI->getOperand(0);
   // Construct the condition if needed.
   BasicBlock *BB = BI->getParent();
-  IRBuilder<> Builder(
-      Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
   Value *Mask = nullptr;
   Value *MaskFalse = nullptr;
   Value *MaskTrue = nullptr;
   if (Invert.has_value()) {
+    IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
     Mask = Builder.CreateBitCast(
         *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
         VCondTy);
   } else {
+    IRBuilder<> Builder(BI);
     MaskFalse = Builder.CreateBitCast(
         Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
     MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
@@ -1723,13 +1728,20 @@ static void hoistConditionalLoadsStores(
       PHINode *PN = nullptr;
       Value *PassThru = nullptr;
       if (Invert.has_value())
-        for (User *U : I->users())
+        for (User *U : I->users()) {
           if ((PN = dyn_cast<PHINode>(U))) {
             PassThru = Builder.CreateBitCast(
                 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
                 FixedVectorType::get(Ty, 1));
-            break;
+          } else if (auto *Ins = cast<Instruction>(U);
+                     Sel && Ins->getParent() == BB) {
+            // This happens when store or/and a speculative instruction between
+            // load and store were hoisted to the BB. Make sure the masked load
+            // inserted before its use.
+            // We assume there's one of such use.
+            Builder.SetInsertPoint(Ins);
           }
+        }
       MaskedLoadStore = Builder.CreateMaskedLoad(
           FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
       Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1769,10 +1781,10 @@ static bool isSafeCheapLoadStore(const Instruction *I,
                                  const TargetTransformInfo &TTI) {
   // Not handle volatile or atomic.
   if (auto *L = dyn_cast<LoadInst>(I)) {
-    if (!L->isSimple())
+    if (!L->isSimple() || (DisableCloadCstore & 1))
       return false;
   } else if (auto *S = dyn_cast<StoreInst>(I)) {
-    if (!S->isSimple())
+    if (!S->isSimple() || (DisableCloadCstore & 2))
       return false;
   } else
     return false;
@@ -3308,6 +3320,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
   // If we get here, we can hoist the instruction and if-convert.
   LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
+  Instruction *Sel = nullptr;
   // Insert a select of the value of the speculated store.
   if (SpeculatedStoreValue) {
     IRBuilder<NoFolder> Builder(BI);
@@ -3318,6 +3331,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
       std::swap(TrueV, FalseV);
     Value *S = Builder.CreateSelect(
         BrCond, TrueV, FalseV, "spec.store.select", BI);
+    Sel = cast<Instruction>(S);
     SpeculatedStore->setOperand(0, S);
     SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
                                          SpeculatedStore->getDebugLoc());
@@ -3390,7 +3404,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
              std::prev(ThenBB->end()));
 
   if (!SpeculatedConditionalLoadsStores.empty())
-    hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
+    hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
+                                Sel);
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<NoFolder> Builder(BI);
@@ -8042,7 +8057,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
 
         if (CanSpeculateConditionalLoadsStores()) {
           hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
-                                      std::nullopt);
+                                      std::nullopt, nullptr);
           return requestResimplify();
         }
       }
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 5c9058b482320..eacabde9f3d61 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -1,24 +1,42 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=0 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=1 -S | FileCheck %s --check-prefixes=CHECK,NONE,STOREONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=2 -S | FileCheck %s --check-prefixes=CHECK,NONE,LOADONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=3 -S | FileCheck %s --check-prefixes=NONE,NONEONLY
 
 ;; Basic case: check masked.load/store is generated for i16/i32/i64.
 define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @basic(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    ret void
+; LOADSTORE-LABEL: @basic(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT:    [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; LOADSTORE-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; LOADSTORE-NEXT:    [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    ret void
+;
+; NONE-LABEL: @basic(
+; NONE-NEXT:  entry:
+; NONE-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_END:%.*]]
+; NONE:       if.true:
+; NONE-NEXT:    [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
+; NONE-NEXT:    store i16 [[TMP0]], ptr [[B]], align 2
+; NONE-NEXT:    store i32 [[TMP1]], ptr [[P]], align 4
+; NONE-NEXT:    store i64 [[TMP2]], ptr [[Q]], align 8
+; NONE-NEXT:    br label [[IF_END]]
+; NONE:       if.end:
+; NONE-NEXT:    ret void
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -41,16 +59,27 @@ if.end:
 
 ;; Successor 1 branches to successor 0.
 define void @succ1to0(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @succ1to0(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT:    ret void
+; LOADSTORE-LABEL: @succ1to0(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; LOADSTORE-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT:    [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT:    ret void
+;
+; NONE-LABEL: @succ1to0(
+; NONE-NEXT:  entry:
+; NONE-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONE-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; NONE:       if.end:
+; NONE-NEXT:    ret void
+; NONE:       if.then:
+; NONE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT:    br label [[IF_END]]
 ;
 entry:
   %tobool = icmp ne i32 %a, 0
@@ -67,14 +96,45 @@ if.then:
 
 ;; Successor 1 branches to successor 0 and there is a phi node.
 define i32 @succ1to0_phi(ptr %p)  {
-; CHECK-LABEL: @succ1to0_phi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @succ1to0_phi(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT:    ret i32 [[TMP3]]
+;
+; STOREONLY-LABEL: @succ1to0_phi(
+; STOREONLY-NEXT:  entry:
+; STOREONLY-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; STOREONLY-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; STOREONLY:       if.false:
+; STOREONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; STOREONLY-NEXT:    br label [[IF_TRUE]]
+; STOREONLY:       if.true:
+; STOREONLY-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT:    ret i32 [[RES]]
+;
+; LOADONLY-LABEL: @succ1to0_phi(
+; LOADONLY-NEXT:  entry:
+; LOADONLY-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADONLY-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADONLY-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADONLY-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADONLY-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADONLY-NEXT:    ret i32 [[TMP3]]
+;
+; NONEONLY-LABEL: @succ1to0_phi(
+; NONEONLY-NEXT:  entry:
+; NONEONLY-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONEONLY-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY:       if.false:
+; NONEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONEONLY-NEXT:    br label [[IF_TRUE]]
+; NONEONLY:       if.true:
+; NONEONLY-NEXT:    [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT:    ret i32 [[RES]]
 ;
 entry:
   %cond = icmp eq ptr %p, null
@@ -91,16 +151,28 @@ if.true:
 
 ;; Successor 0 branches to successor 1.
 define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @succ0to1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT:    ret void
+; LOADSTORE-LABEL: @succ0to1(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT:    [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; LOADSTORE-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT:    ret void
+;
+; NONE-LABEL: @succ0to1(
+; NONE-NEXT:  entry:
+; NONE-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE:       if.false:
+; NONE-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
+; NONE-NEXT:    ret void
+; NONE:       if.true:
+; NONE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; NONE-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT:    br label [[IF_FALSE]]
 ;
 entry:
   %cond = icmp eq i32 %a, 0
@@ -121,16 +193,29 @@ if.end:
 
 ;; Load after store can be hoisted.
 define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
-; CHECK-LABEL: @load_after_store(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT:    [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
-; CHECK-NEXT:    ret i64 [[SPEC_SELECT]]
+; LOADSTORE-LABEL: @load_after_store(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT:    [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; LOADSTORE-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
+; LOADSTORE-NEXT:    ret i64 [[SPEC_SELECT]]
+;
+; NONE-LABEL: @load_after_store(
+; NONE-NEXT:  entry:
+; NONE-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT:    br i1 [[COND]], label [[IF_TRUE:%.*]], label [[COMMON_RET:%.*]]
+; NONE:       common.ret:
+; NONE-NEXT:    [[COMMON_RET_OP:%.*]] = phi i64 [ [[ZEXT:%.*]], [[IF_TRUE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT:    ret i64 [[COMMON_RET_OP]]
+; NONE:       if.true:
+; NONE-NEXT:    store i32 1, ptr [[B:%.*]], align 4
+; NONE-NEXT:    [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT:    [[ZEXT]] = zext i16 [[TMP0]] to i64
+; NONE-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %cond = icmp eq i32 %a, 0
@@ -148,15 +233,49 @@ if.end:
 
 ;; Speculatable memory read doesn't prevent the hoist.
 define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
-; CHECK-LABEL: @load_skip_speculatable_memory_read(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[READ:%.*]] = call i32 @read_memory_only()
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT:    ret void
+; LOADSTORE-LABEL: @load_skip_speculatable_memory_read(
+; LOADSTORE-NEXT:  entry:
+; LOADSTORE-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT:    [[READ:%.*]] = call i32 @read_memory_only()
+; LOADSTORE-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT:    [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; LOADSTORE-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT:    ret void
+;
+; STOREONLY-LABEL: @load_skip_speculatable_memory_read(
+; STOREONLY-NEXT:  entry:
+; STOREONLY-NEXT:    [[COND...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/132032


More information about the llvm-commits mailing list