[llvm] [CodeGen] Fix generating permute bytes from register pair when the initial values are undefined (PR #74437)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 15 02:37:54 PST 2024


https://github.com/mmoadeli updated https://github.com/llvm/llvm-project/pull/74437

>From 27afcf4c84f8cbd37ecdf532bc15c7a23edb19c2 Mon Sep 17 00:00:00 2001
From: m moadeli <mahmoud.moadeli at codeplay.com>
Date: Tue, 5 Dec 2023 09:20:07 +0000
Subject: [PATCH 1/2] Fix generating permute bytes from register pair when the
 initial values are undefined.

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp    |  6 ++++--
 .../CodeGen/NVPTX/shuffle-vec-undef-init.ll    | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b975825dae4b6a..a13e2db853846c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2360,8 +2360,10 @@ SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
   SDValue V2 = Op.getOperand(1);
   uint32_t Selector = 0;
-  for (auto I : llvm::enumerate(SVN->getMask()))
-    Selector |= (I.value() << (I.index() * 4));
+  for (auto I : llvm::enumerate(SVN->getMask())) {
+    if (I.value() != -1)
+      Selector |= (I.value() << (I.index() * 4));
+  }
 
   SDLoc DL(Op);
   return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
diff --git a/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll b/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
new file mode 100644
index 00000000000000..4f147f28e1a57e
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s  
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s   -check-prefix=CHECK-FOUND
+
+define void @kernel_func(ptr %in.vec, ptr %out.vec0) nounwind {
+  entry:
+  %wide.vec = load <32 x i8>, ptr %in.vec, align 64
+  %vec0 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+  store <4 x i8> %vec0, ptr %out.vec0, align 64
+  ret void
+
+; CHECK-FOUND: prmt.b32 	{{.*}} 16384;
+; CHECK-FOUND: prmt.b32 	{{.*}} 64;
+; CHECK-FOUND: prmt.b32 	{{.*}} 30224;
+
+; CHECK:  @kernel_func
+; CHECK-NOT: 	prmt.b32 	{{.*}} -1;
+; CHECK:  -- End function
+}

>From 3452d8756f6af876f8d2335d761917facb086f56 Mon Sep 17 00:00:00 2001
From: m moadeli <mahmoud.moadeli at codeplay.com>
Date: Mon, 15 Jan 2024 10:37:13 +0000
Subject: [PATCH 2/2] Add comment to clarify -1 as a placeholder for undef
 value.

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a13e2db853846c..d5a18bfaa7e603 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2361,7 +2361,7 @@ SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   SDValue V2 = Op.getOperand(1);
   uint32_t Selector = 0;
   for (auto I : llvm::enumerate(SVN->getMask())) {
-    if (I.value() != -1)
+    if (I.value() != -1) // -1 is a placeholder for undef.
       Selector |= (I.value() << (I.index() * 4));
   }
 



More information about the llvm-commits mailing list