[llvm] [DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. (PR #83763)

Yeting Kuo via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 19:07:43 PST 2024


https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/83763

>From a5c930485df1bf9150979d8a41c603bcdca44ac0 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 11:15:21 +0800
Subject: [PATCH 1/2] [DAG] Teach SelectionDAGBuilder to read parameter
 alignment of compressstore/expandload.

Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload.
This patch allows SelectionDAGBuilder to use parameter alignment like memory vp
intrinsics and stills uses ABI alignment for them when they don't have alignment
attriubtes.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  4 +-
 .../CodeGen/X86/masked_compressstore_isel.ll  | 21 +++++++++-
 .../CodeGen/X86/masked_expandload_isel.ll     | 42 +++++++++++++++++++
 3 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/masked_expandload_isel.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 48476b0ef9705b..e0764ef731e8e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4734,7 +4734,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
-    Alignment = std::nullopt;
+    Alignment = I.getParamAlign(1).valueOrOne();
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4898,7 +4898,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
                                  MaybeAlign &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = std::nullopt;
+    Alignment = I.getParamAlign(0).valueOrOne();
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index 0587434bff2552..2a557ac9b97b3a 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -11,7 +11,7 @@ define void @_Z3fooiPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
   ; CHECK-NEXT:   [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
   ; CHECK-NEXT:   [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
   ; CHECK-NEXT:   [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
-  ; CHECK-NEXT:   VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
+  ; CHECK-NEXT:   VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 1)
   ; CHECK-NEXT:   RET 0
 entry:
   %1 = trunc <8 x i32> %gepload to <8 x i16>
@@ -19,6 +19,25 @@ entry:
   ret void
 }
 
+
+define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
+  ; CHECK-LABEL: name: _Z3foo2iPiPs
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   liveins: $ymm0, $xmm1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vr256x = COPY $ymm0
+  ; CHECK-NEXT:   [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
+  ; CHECK-NEXT:   [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
+  ; CHECK-NEXT:   [[VPMOVDWZ256rr:%[0-9]+]]:vr128x = VPMOVDWZ256rr [[COPY1]]
+  ; CHECK-NEXT:   VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed [[VPMOVW2MZ128rr]], killed [[VPMOVDWZ256rr]] :: (store unknown-size into `ptr null`, align 16)
+  ; CHECK-NEXT:   RET 0
+entry:
+  %1 = trunc <8 x i32> %gepload to <8 x i16>
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
+  ret void
+}
+
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
 
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
new file mode 100644
index 00000000000000..b364625a1e6f15
--- /dev/null
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
+
+define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
+  ; CHECK-LABEL: name: _Z3fooiPiPs
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   liveins: $xmm0, $xmm1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
+  ; CHECK-NEXT:   [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
+  ; CHECK-NEXT:   [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
+  ; CHECK-NEXT:   [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
+  ; CHECK-NEXT:   $xmm0 = COPY [[VPEXPANDWZ128rmk]]
+  ; CHECK-NEXT:   RET 0, $xmm0
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
+  ; CHECK-LABEL: name: _Z3foo2iPiPs
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   liveins: $xmm0, $xmm1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vr128x = COPY $xmm1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vr128x = COPY $xmm0
+  ; CHECK-NEXT:   [[VPSLLWZ128ri:%[0-9]+]]:vr128x = VPSLLWZ128ri [[COPY]], 15
+  ; CHECK-NEXT:   [[VPMOVW2MZ128rr:%[0-9]+]]:vk16wm = VPMOVW2MZ128rr killed [[VPSLLWZ128ri]]
+  ; CHECK-NEXT:   [[VPEXPANDWZ128rmk:%[0-9]+]]:vr128x = VPEXPANDWZ128rmk [[COPY1]], killed [[VPMOVW2MZ128rr]], $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+  ; CHECK-NEXT:   $xmm0 = COPY [[VPEXPANDWZ128rmk]]
+  ; CHECK-NEXT:   RET 0, $xmm0
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+
+attributes #0 = { "target-cpu"="icelake-server" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

>From 6322e3d0486c805d1e1c90381e7d8018a021bbef Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 5 Mar 2024 11:07:28 +0800
Subject: [PATCH 2/2] Use Align instead of MaybeAlign.

---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 25 ++++++++-----------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e0764ef731e8e0..d6ab0cf6df8763 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4721,15 +4721,15 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
   SDLoc sdl = getCurSDLoc();
 
   auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
-                               MaybeAlign &Alignment) {
+                               Align &Alignment) {
     // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
-    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
+    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getAlignValue();
     Mask = I.getArgOperand(3);
   };
   auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
-                                    MaybeAlign &Alignment) {
+                                    Align &Alignment) {
     // llvm.masked.compressstore.*(Src0, Ptr, Mask)
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
@@ -4738,7 +4738,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
-  MaybeAlign Alignment;
+  Align Alignment;
   if (IsCompressing)
     getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
   else
@@ -4750,12 +4750,10 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
 
   EVT VT = Src0.getValueType();
-  if (!Alignment)
-    Alignment = DAG.getEVTAlign(VT);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
-      MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
+      MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
   SDValue StoreNode =
       DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
                          ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4887,15 +4885,15 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDLoc sdl = getCurSDLoc();
 
   auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
-                              MaybeAlign &Alignment) {
+                              Align &Alignment) {
     // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
+    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getAlignValue();
     Mask = I.getArgOperand(2);
     Src0 = I.getArgOperand(3);
   };
   auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
-                                 MaybeAlign &Alignment) {
+                                 Align &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
     Alignment = I.getParamAlign(0).valueOrOne();
@@ -4904,7 +4902,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
-  MaybeAlign Alignment;
+  Align Alignment;
   if (IsExpanding)
     getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
   else
@@ -4916,9 +4914,6 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
 
   EVT VT = Src0.getValueType();
-  if (!Alignment)
-    Alignment = DAG.getEVTAlign(VT);
-
   AAMDNodes AAInfo = I.getAAMetadata();
   const MDNode *Ranges = getRangeMetadata(I);
 
@@ -4930,7 +4925,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
 
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
-      MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+      MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
 
   SDValue Load =
       DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,



More information about the llvm-commits mailing list