[llvm] [X86, SimplifyCFG] Support hoisting load/store with conditional faulting (Part I) (PR #96878)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 12 23:21:49 PDT 2024


================
@@ -3214,6 +3238,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
   BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
              std::prev(ThenBB->end()));
 
+  // If the target supports conditional faulting,
+  // we look for the following pattern:
+  // \code
+  //   BB:
+  //     ...
+  //     %cond = icmp ult %x, %y
+  //     br i1 %cond, label %TrueBB, label %FalseBB
+  //   FalseBB:
+  //     store i32 1, ptr %q, align 4
+  //     ...
+  //   TrueBB:
+  //     %maskedloadstore = load i32, ptr %b, align 4
+  //     store i32 %maskedloadstore, ptr %p, align 4
+  //     ...
+  // \endcode
+  //
+  // and transform it into:
+  //
+  // \code
+  //   BB:
+  //     ...
+  //     %cond = icmp ult %x, %y
+  //     %maskedloadstore = cload i32, ptr %b, %cond
+  //     cstore i32 %maskedloadstore, ptr %p, %cond
+  //     cstore i32 1, ptr %q, ~%cond
+  //     br i1 %cond, label %TrueBB, label %FalseBB
+  //   FalseBB:
+  //     ...
+  //   TrueBB:
+  //     ...
+  // \endcode
+  //
+  // where cload/cstore are represented by llvm.masked.load/store intrinsics,
+  // e.g.
+  //
+  // \code
+  //   %vcond = bitcast i1 %cond to <1 x i1>
+  //   %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+  //                         (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+  //   %maskedloadstore = bitcast <1 x i32> %v0 to i32
+  //   call void @llvm.masked.store.v1i32.p0
+  //                          (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+  //   %cond.not = xor i1 %cond, true
+  //   %vcond.not = bitcast i1 %cond.not to <1 x i>
+  //   call void @llvm.masked.store.v1i32.p0
+  //              (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+  // \endcode
+  //
+  // So we need to turn hoisted load/store into cload/cstore.
+  auto &Context = BI->getParent()->getContext();
+  auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+  auto *Cond = BI->getOperand(0);
+  Value *Mask = nullptr;
+  // Construct the condition if needed.
+  if (!SpeculatedConditionalLoadsStores.empty()) {
+    IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
+    if (Invert)
+      Mask = Builder.CreateBitCast(
+          Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+    else
+      Mask = Builder.CreateBitCast(Cond, VCondTy);
+  }
+  for (auto *I : SpeculatedConditionalLoadsStores) {
+    IRBuilder<> Builder(I);
+    // We currently assume conditional faulting load/store is supported for
+    // scalar types only when creating new instructions. This can be easily
+    // extended for vector types in the future.
+    assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
+    auto *Op0 = I->getOperand(0);
+    Instruction *MaskedLoadStore = nullptr;
+    if (auto *LI = dyn_cast<LoadInst>(I)) {
+      // Handle Load.
+      auto *Ty = I->getType();
+      MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
+                                                 Op0, LI->getAlign(), Mask);
+      I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+    } else {
+      // Handle Store.
+      auto *StoredVal =
+          Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+      MaskedLoadStore = Builder.CreateMaskedStore(
+          StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
----------------
dtcxzyw wrote:

> #99257 It seems the alignment should be less than 2^32 (4294967296) @dtcxzyw

It is generated by a transform pass :( BTW `opt -O3 -verify-each` doesn't complain about this.
cc @nikic



https://github.com/llvm/llvm-project/pull/96878


More information about the llvm-commits mailing list