[llvm] [DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. (PR #83763)

Yeting Kuo via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 01:02:42 PST 2024


https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/83763

>From 177fbeecd758e6e9e2f391568969af607687ef4b Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 11:15:21 +0800
Subject: [PATCH 1/5] [DAG] Teach SelectionDAGBuilder to read parameter
 alignment of compressstore/expandload.

Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload.
This patch allows SelectionDAGBuilder to use parameter alignment like memory vp
intrinsics and stills uses ABI alignment for them when they don't have alignment
attriubtes.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  4 +++
 .../CodeGen/X86/masked_compressstore_isel.ll  | 18 +++++++++-
 .../CodeGen/X86/masked_expandload_isel.ll     | 33 +++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/masked_expandload_isel.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc..39628f4fb3689d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4582,6 +4582,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
     Alignment = std::nullopt;
+    if (MaybeAlign Align = I.getParamAlign(1))
+      Alignment = Align;
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4746,6 +4748,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
     Alignment = std::nullopt;
+    if (MaybeAlign Align = I.getParamAlign(0))
+      Alignment = Align;
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index 1851a21c8c0641..b5857b22382da0 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -7,7 +7,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: bb.0.entry:
+; CHECK-LABEL:   name: _Z3fooiPiPs
 ; CHECK:         %1:vr128x = COPY $xmm1
 ; CHECK-NEXT:    %0:vr256x = COPY $ymm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
@@ -16,6 +16,22 @@ entry:
 ; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
 ; CHECK-NEXT:    RET 0
 
+define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
+entry:
+  %1 = trunc <8 x i32> %gepload to <8 x i16>
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 32 null, <8 x i1> %0)
+  ret void
+}
+
+; CHECK-LABEL:   name: _Z3foo2iPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr256x = COPY $ymm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 32)
+; CHECK-NEXT:    RET 0
+
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
 
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
new file mode 100644
index 00000000000000..7f20e03d343cdb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
+
+define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %0, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+; CHECK-LABEL:   name: _Z3fooiPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr128x = COPY $xmm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+
+define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 32 null, <8 x i1> %0, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+; CHECK-LABEL:   name: _Z3foo2iPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr128x = COPY $xmm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 32)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+
+attributes #0 = { "target-cpu"="icelake-server" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

>From e52aca0016eb5926b6904a1272bbc4e4ec81e9f4 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 13:46:02 +0800
Subject: [PATCH 2/5] Simplifed code.

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 39628f4fb3689d..96d0f139c0a672 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4581,9 +4581,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
-    Alignment = std::nullopt;
-    if (MaybeAlign Align = I.getParamAlign(1))
-      Alignment = Align;
+    Alignment = I.getParamAlign(1);
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4747,9 +4745,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
                                  MaybeAlign &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = std::nullopt;
-    if (MaybeAlign Align = I.getParamAlign(0))
-      Alignment = Align;
+    Alignment = I.getParamAlign(0);
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };

>From de079e6cc331730717c76188b658c85c96aac0bc Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 15:56:37 +0800
Subject: [PATCH 3/5] Defaultly use 1 alignment.

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ++--
 llvm/test/CodeGen/X86/masked_compressstore_isel.ll    | 6 +++---
 llvm/test/CodeGen/X86/masked_expandload_isel.ll       | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 96d0f139c0a672..612402581da3c2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4581,7 +4581,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
-    Alignment = I.getParamAlign(1);
+    Alignment = I.getParamAlign(1).valueOrOne();
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4745,7 +4745,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
                                  MaybeAlign &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = I.getParamAlign(0);
+    Alignment = I.getParamAlign(0).valueOrOne();
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index b5857b22382da0..fb206d3412081e 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -13,13 +13,13 @@ entry:
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
 ; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
-; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 1)
 ; CHECK-NEXT:    RET 0
 
 define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
 entry:
   %1 = trunc <8 x i32> %gepload to <8 x i16>
-  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 32 null, <8 x i1> %0)
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
   ret void
 }
 
@@ -29,7 +29,7 @@ entry:
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
 ; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
-; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 32)
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
 ; CHECK-NEXT:    RET 0
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
index 7f20e03d343cdb..65116a75ef8529 100644
--- a/llvm/test/CodeGen/X86/masked_expandload_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -11,11 +11,11 @@ entry:
 ; CHECK-NEXT:    %0:vr128x = COPY $xmm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
-; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
 
 define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
 entry:
-  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 32 null, <8 x i1> %0, <8 x i16> %src)
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %0, <8 x i16> %src)
   ret <8 x i16> %res
 }
 
@@ -24,7 +24,7 @@ entry:
 ; CHECK-NEXT:    %0:vr128x = COPY $xmm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
-; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 32)
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)

>From 9d3a50b673f15a87a1cbc85e9a5f0716a2a58b33 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 17:01:44 +0800
Subject: [PATCH 4/5] Update naming.

---
 llvm/test/CodeGen/X86/masked_expandload_isel.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
index 65116a75ef8529..5542c8227d9d2b 100644
--- a/llvm/test/CodeGen/X86/masked_expandload_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
 
-define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
 entry:
-  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %0, <8 x i16> %src)
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %mask, <8 x i16> %src)
   ret <8 x i16> %res
 }
 
@@ -13,9 +13,9 @@ entry:
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
 ; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
 
-define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %mask) #0 {
 entry:
-  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %0, <8 x i16> %src)
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %mask, <8 x i16> %src)
   ret <8 x i16> %res
 }
 

>From c3b7ee3bd8ed66a23873ae34854058ab8cecbf91 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 17:02:03 +0800
Subject: [PATCH 5/5] Update LangRef.rest

---
 llvm/docs/LangRef.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index f56d4ed28f2855..a2d1f816665250 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -24276,6 +24276,9 @@ Arguments:
 
 The first operand is the base pointer for the load. It has the same underlying type as the element of the returned vector. The second operand, mask, is a vector of boolean values with the same number of elements as the return type. The third is a pass-through value that is used to fill the masked-off lanes of the result. The return type and the type of the '``passthru``' operand have the same vector type.
 
+The :ref:`align <attr_align>` parameter attribute can be provided for the first
+operand.
+
 Semantics:
 """"""""""
 
@@ -24333,6 +24336,8 @@ Arguments:
 
 The first operand is the input vector, from which elements are collected and written to memory. The second operand is the base pointer for the store, it has the same underlying type as the element of the input vector operand. The third operand is the mask, a vector of boolean values. The mask and the input vector must have the same number of vector elements.
 
+The :ref:`align <attr_align>` parameter attribute can be provided for the second
+operand.
 
 Semantics:
 """"""""""



More information about the llvm-commits mailing list