[llvm] [X86][APX] Do optimizeMemoryInst for v1X masked load/store (PR #151331)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 30 19:50:46 PDT 2025


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/151331

>From bf50bed5b55043f5fb7dfc16f3d59192c84b9d81 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 30 Jul 2025 21:12:43 +0800
Subject: [PATCH 1/2] [X86][APX] Do optimizeMemoryInst for v1X masked
 load/store

Fix redundant LEA: https://godbolt.org/z/hrP1eox4Y
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 23 +++++++++++++++++++++++
 llvm/test/CodeGen/X86/apx/cf.ll     | 19 +++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 416c56d5a36f8..f16283be1b996 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2769,6 +2769,29 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
       return optimizeGatherScatterInst(II, II->getArgOperand(0));
     case Intrinsic::masked_scatter:
       return optimizeGatherScatterInst(II, II->getArgOperand(1));
+    case Intrinsic::masked_load:
+      // Treat v1X masked load as load X type.
+      if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
+        if (VT->getNumElements() == 1) {
+          Value *PtrVal = II->getArgOperand(0);
+          unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+          if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+            return true;
+        }
+      }
+      return false;
+    case Intrinsic::masked_store:
+      // Treat v1X masked store as store X type.
+      if (auto *VT =
+              dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+        if (VT->getNumElements() == 1) {
+          Value *PtrVal = II->getArgOperand(1);
+          unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+          if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
+            return true;
+        }
+      }
+      return false;
     }
 
     SmallVector<Value *, 2> PtrOps;
diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index b111ae542d93a..8c9869207f775 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -194,3 +194,22 @@ entry:
   call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0)
   ret void
 }
+
+define void @sink_gep(ptr %p, i1 %cond) {
+; CHECK-LABEL: sink_gep:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    cfcmovnel %eax, 112(%rdi)
+; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    retq
+entry:
+  %0 = getelementptr i8, ptr %p, i64 112
+  br label %next
+
+next:
+  %1 = bitcast i1 %cond to <1 x i1>
+  call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1)
+  store i32 0, ptr %p, align 4
+  ret void
+}

>From f51c6bdb95785819ea41409c9ff062411b91fc70 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Thu, 31 Jul 2025 10:50:22 +0800
Subject: [PATCH 2/2] Add load

---
 llvm/test/CodeGen/X86/apx/cf.ll | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll
index 8c9869207f775..c97ec38aaff01 100644
--- a/llvm/test/CodeGen/X86/apx/cf.ll
+++ b/llvm/test/CodeGen/X86/apx/cf.ll
@@ -201,7 +201,8 @@ define void @sink_gep(ptr %p, i1 %cond) {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb $1, %sil
 ; CHECK-NEXT:    cfcmovnel %eax, 112(%rdi)
-; CHECK-NEXT:    movl $0, (%rdi)
+; CHECK-NEXT:    cfcmovnel 112(%rdi), %eax
+; CHECK-NEXT:    movl %eax, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
   %0 = getelementptr i8, ptr %p, i64 112
@@ -210,6 +211,7 @@ entry:
 next:
   %1 = bitcast i1 %cond to <1 x i1>
   call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr %0, i32 1, <1 x i1> %1)
-  store i32 0, ptr %p, align 4
+  %2 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %0, i32 1, <1 x i1> %1, <1 x i32> zeroinitializer)
+  store <1 x i32> %2, ptr %p, align 4
   ret void
 }



More information about the llvm-commits mailing list