[llvm] [DAG] Teach SelectionDAGBuilder to read parameter alignment of compressstore/expandload. (PR #83763)

Yeting Kuo via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 3 23:57:09 PST 2024


https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/83763

>From 177fbeecd758e6e9e2f391568969af607687ef4b Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 11:15:21 +0800
Subject: [PATCH 1/3] [DAG] Teach SelectionDAGBuilder to read parameter
 alignment of compressstore/expandload.

Previously SelectionDAGBuilder used ABI alignment for compressstore/expandload.
This patch allows SelectionDAGBuilder to use parameter alignment like memory vp
intrinsics and stills uses ABI alignment for them when they don't have alignment
attriubtes.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  4 +++
 .../CodeGen/X86/masked_compressstore_isel.ll  | 18 +++++++++-
 .../CodeGen/X86/masked_expandload_isel.ll     | 33 +++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/masked_expandload_isel.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ab2f42d2024ccc..39628f4fb3689d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4582,6 +4582,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
     Alignment = std::nullopt;
+    if (MaybeAlign Align = I.getParamAlign(1))
+      Alignment = Align;
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4746,6 +4748,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
     Alignment = std::nullopt;
+    if (MaybeAlign Align = I.getParamAlign(0))
+      Alignment = Align;
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index 1851a21c8c0641..b5857b22382da0 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -7,7 +7,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: bb.0.entry:
+; CHECK-LABEL:   name: _Z3fooiPiPs
 ; CHECK:         %1:vr128x = COPY $xmm1
 ; CHECK-NEXT:    %0:vr256x = COPY $ymm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
@@ -16,6 +16,22 @@ entry:
 ; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
 ; CHECK-NEXT:    RET 0
 
+define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
+entry:
+  %1 = trunc <8 x i32> %gepload to <8 x i16>
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 32 null, <8 x i1> %0)
+  ret void
+}
+
+; CHECK-LABEL:   name: _Z3foo2iPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr256x = COPY $ymm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 32)
+; CHECK-NEXT:    RET 0
+
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
 declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr nocapture, <8 x i1>) #1
 
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
new file mode 100644
index 00000000000000..7f20e03d343cdb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -start-after=codegenprepare -stop-before finalize-isel | FileCheck %s
+
+define <8 x i16> @_Z3fooiPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr null, <8 x i1> %0, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+; CHECK-LABEL:   name: _Z3fooiPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr128x = COPY $xmm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+
+define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
+entry:
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 32 null, <8 x i1> %0, <8 x i16> %src)
+  ret <8 x i16> %res
+}
+
+; CHECK-LABEL:   name: _Z3foo2iPiPs
+; CHECK:         %1:vr128x = COPY $xmm1
+; CHECK-NEXT:    %0:vr128x = COPY $xmm0
+; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
+; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 32)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
+
+attributes #0 = { "target-cpu"="icelake-server" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }

>From e52aca0016eb5926b6904a1272bbc4e4ec81e9f4 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 13:46:02 +0800
Subject: [PATCH 2/3] Simplifed code.

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 39628f4fb3689d..96d0f139c0a672 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4581,9 +4581,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
-    Alignment = std::nullopt;
-    if (MaybeAlign Align = I.getParamAlign(1))
-      Alignment = Align;
+    Alignment = I.getParamAlign(1);
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4747,9 +4745,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
                                  MaybeAlign &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = std::nullopt;
-    if (MaybeAlign Align = I.getParamAlign(0))
-      Alignment = Align;
+    Alignment = I.getParamAlign(0);
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };

>From de079e6cc331730717c76188b658c85c96aac0bc Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 4 Mar 2024 15:56:37 +0800
Subject: [PATCH 3/3] Defaultly use 1 alignment.

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 ++--
 llvm/test/CodeGen/X86/masked_compressstore_isel.ll    | 6 +++---
 llvm/test/CodeGen/X86/masked_expandload_isel.ll       | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 96d0f139c0a672..612402581da3c2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4581,7 +4581,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
     Src0 = I.getArgOperand(0);
     Ptr = I.getArgOperand(1);
     Mask = I.getArgOperand(2);
-    Alignment = I.getParamAlign(1);
+    Alignment = I.getParamAlign(1).valueOrOne();
   };
 
   Value  *PtrOperand, *MaskOperand, *Src0Operand;
@@ -4745,7 +4745,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
                                  MaybeAlign &Alignment) {
     // @llvm.masked.expandload.*(Ptr, Mask, Src0)
     Ptr = I.getArgOperand(0);
-    Alignment = I.getParamAlign(0);
+    Alignment = I.getParamAlign(0).valueOrOne();
     Mask = I.getArgOperand(1);
     Src0 = I.getArgOperand(2);
   };
diff --git a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
index b5857b22382da0..fb206d3412081e 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore_isel.ll
@@ -13,13 +13,13 @@ entry:
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
 ; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
-; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 1)
 ; CHECK-NEXT:    RET 0
 
 define void @_Z3foo2iPiPs(<8 x i32> %gepload, <8 x i1> %0) #0 {
 entry:
   %1 = trunc <8 x i32> %gepload to <8 x i16>
-  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 32 null, <8 x i1> %0)
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %1, ptr align 16 null, <8 x i1> %0)
   ret void
 }
 
@@ -29,7 +29,7 @@ entry:
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
 ; CHECK-NEXT:    %4:vr128x = VPMOVDWZ256rr %0
-; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 32)
+; CHECK-NEXT:    VPCOMPRESSWZ128mrk $noreg, 1, $noreg, 0, $noreg, killed %3, killed %4 :: (store unknown-size into `ptr null`, align 16)
 ; CHECK-NEXT:    RET 0
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
diff --git a/llvm/test/CodeGen/X86/masked_expandload_isel.ll b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
index 7f20e03d343cdb..65116a75ef8529 100644
--- a/llvm/test/CodeGen/X86/masked_expandload_isel.ll
+++ b/llvm/test/CodeGen/X86/masked_expandload_isel.ll
@@ -11,11 +11,11 @@ entry:
 ; CHECK-NEXT:    %0:vr128x = COPY $xmm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
-; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 1)
 
 define <8 x i16> @_Z3foo2iPiPs(<8 x i16> %src, <8 x i1> %0) #0 {
 entry:
-  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 32 null, <8 x i1> %0, <8 x i16> %src)
+  %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 16 null, <8 x i1> %0, <8 x i16> %src)
   ret <8 x i16> %res
 }
 
@@ -24,7 +24,7 @@ entry:
 ; CHECK-NEXT:    %0:vr128x = COPY $xmm0
 ; CHECK-NEXT:    %2:vr128x = VPSLLWZ128ri %1, 15
 ; CHECK-NEXT:    %3:vk16wm = VPMOVW2MZ128rr killed %2
-; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 32)
+; CHECK-NEXT:    %4:vr128x = VPEXPANDWZ128rmk %0, killed %3, $noreg, 1, $noreg, 0, $noreg :: (load unknown-size from `ptr null`, align 16)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)



More information about the llvm-commits mailing list