[llvm] [X86,SimplifyCFG] Use passthru to reduce select (PR #108754)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 15 19:57:44 PDT 2024


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/108754

>From 0381a076d6a7cd7da83d98f8afb4f5ab957c97ae Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sun, 15 Sep 2024 21:04:07 +0800
Subject: [PATCH 1/2] [X86,SimplifyCFG] Use passthru to reduce select

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp       | 17 ++++++++++++++---
 .../X86/hoist-loads-stores-with-cf.ll           | 10 ++++------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f9db996cdc3583..5cebfbadf22069 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3335,12 +3335,21 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
     assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
     auto *Op0 = I->getOperand(0);
     Instruction *MaskedLoadStore = nullptr;
+    PHINode *PN = nullptr;
     if (auto *LI = dyn_cast<LoadInst>(I)) {
       // Handle Load.
       auto *Ty = I->getType();
-      MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
-                                                 Op0, LI->getAlign(), Mask);
-      I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+      Value *PassThru = nullptr;
+      if (I->hasOneUse())
+        if ((PN = dyn_cast<PHINode>(I->use_begin()->getUser())))
+          PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
+                                           FixedVectorType::get(Ty, 1));
+      MaskedLoadStore = Builder.CreateMaskedLoad(
+          FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
+      if (PN)
+        PN->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+      else
+        I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
     } else {
       // Handle Store.
       auto *StoredVal =
@@ -3365,6 +3374,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
       return Node->getMetadataID() == Metadata::DIAssignIDKind;
     });
     MaskedLoadStore->copyMetadata(*I);
+    if (PN)
+      PN->eraseFromParent();
     I->eraseFromParent();
   }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 047ca717da8009..760334dc1d2815 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -72,10 +72,9 @@ define i32 @succ1to0_phi(ptr %p)  {
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
 ; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
-; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
 entry:
   %cond = icmp eq ptr %p, null
@@ -184,10 +183,9 @@ define i32 @load_from_gep(ptr %p)  {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
 ; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
-; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
 entry:
   %cond = icmp eq ptr %p, null

>From afc77193fd16599ef3a7425ef0a948bd4d55c521 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 16 Sep 2024 10:57:27 +0800
Subject: [PATCH 2/2] Address review comments

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 12 ++++----
 .../X86/hoist-loads-stores-with-cf.ll         | 30 +++++++++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5cebfbadf22069..29c1f24fb5aafe 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3040,7 +3040,7 @@ static bool isSafeCheapLoadStore(const Instruction *I,
 ///     %sub = sub %x, %y
 ///     br label BB2
 ///   EndBB:
-///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+///     %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
 ///     ...
 /// \endcode
 ///
@@ -3335,10 +3335,10 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
     assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
     auto *Op0 = I->getOperand(0);
     Instruction *MaskedLoadStore = nullptr;
-    PHINode *PN = nullptr;
     if (auto *LI = dyn_cast<LoadInst>(I)) {
       // Handle Load.
       auto *Ty = I->getType();
+      PHINode *PN = nullptr;
       Value *PassThru = nullptr;
       if (I->hasOneUse())
         if ((PN = dyn_cast<PHINode>(I->use_begin()->getUser())))
@@ -3346,10 +3346,10 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
                                            FixedVectorType::get(Ty, 1));
       MaskedLoadStore = Builder.CreateMaskedLoad(
           FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
+      Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
       if (PN)
-        PN->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
-      else
-        I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+        PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
+      I->replaceAllUsesWith(NewLoadStore);
     } else {
       // Handle Store.
       auto *StoredVal =
@@ -3374,8 +3374,6 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
       return Node->getMetadataID() == Metadata::DIAssignIDKind;
     });
     MaskedLoadStore->copyMetadata(*I);
-    if (PN)
-      PN->eraseFromParent();
     I->eraseFromParent();
   }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 760334dc1d2815..8d368640f40f81 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -672,6 +672,36 @@ if.false:
   ret void
 }
 
+define i32 @str_transcode0(i1 %cond1, ptr %p, i1 %cond2) {
+; CHECK-LABEL: @str_transcode0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
+; CHECK-NEXT:    store i64 [[Y]], ptr [[P]], align 8
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br i1 %cond1, label %bb3, label %bb1
+
+bb1:                                                ; preds = %entry
+  br i1 %cond2, label %bb2, label %bb3
+
+bb2:                                                ; preds = %bb1
+  %x = load i64, ptr %p, align 8
+  br label %bb3
+
+bb3:                                                ; preds = %bb2, %bb1, %entry
+  %y = phi i64 [ %x, %bb2 ], [ 0, %bb1 ], [ 0, %entry ]
+  store i64 %y, ptr %p, align 8
+  ret i32 0
+}
+
 declare i32 @read_memory_only() readonly nounwind willreturn speculatable
 
 !llvm.dbg.cu = !{!0}



More information about the llvm-commits mailing list