[llvm] [CodeGen] Fix generating permute bytes from register pair when the initial values are undefined (PR #74437)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 02:37:54 PST 2024
https://github.com/mmoadeli updated https://github.com/llvm/llvm-project/pull/74437
>From 27afcf4c84f8cbd37ecdf532bc15c7a23edb19c2 Mon Sep 17 00:00:00 2001
From: m moadeli <mahmoud.moadeli at codeplay.com>
Date: Tue, 5 Dec 2023 09:20:07 +0000
Subject: [PATCH 1/2] Fix generating permute bytes from register pair when the
initial values are undefined.
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 6 ++++--
.../CodeGen/NVPTX/shuffle-vec-undef-init.ll | 18 ++++++++++++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b975825dae4b6a..a13e2db853846c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2360,8 +2360,10 @@ SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
SDValue V2 = Op.getOperand(1);
uint32_t Selector = 0;
- for (auto I : llvm::enumerate(SVN->getMask()))
- Selector |= (I.value() << (I.index() * 4));
+ for (auto I : llvm::enumerate(SVN->getMask())) {
+ if (I.value() != -1)
+ Selector |= (I.value() << (I.index() * 4));
+ }
SDLoc DL(Op);
return DAG.getNode(NVPTXISD::PRMT, DL, MVT::v4i8, V1, V2,
diff --git a/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll b/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
new file mode 100644
index 00000000000000..4f147f28e1a57e
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-FOUND
+
+define void @kernel_func(ptr %in.vec, ptr %out.vec0) nounwind {
+ entry:
+ %wide.vec = load <32 x i8>, ptr %in.vec, align 64
+ %vec0 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+ store <4 x i8> %vec0, ptr %out.vec0, align 64
+ ret void
+
+; CHECK-FOUND: prmt.b32 {{.*}} 16384;
+; CHECK-FOUND: prmt.b32 {{.*}} 64;
+; CHECK-FOUND: prmt.b32 {{.*}} 30224;
+
+; CHECK: @kernel_func
+; CHECK-NOT: prmt.b32 {{.*}} -1;
+; CHECK: -- End function
+}
>From 3452d8756f6af876f8d2335d761917facb086f56 Mon Sep 17 00:00:00 2001
From: m moadeli <mahmoud.moadeli at codeplay.com>
Date: Mon, 15 Jan 2024 10:37:13 +0000
Subject: [PATCH 2/2] Add comment to clarify -1 as a placeholder for undef
value.
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a13e2db853846c..d5a18bfaa7e603 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2361,7 +2361,7 @@ SDValue NVPTXTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V2 = Op.getOperand(1);
uint32_t Selector = 0;
for (auto I : llvm::enumerate(SVN->getMask())) {
- if (I.value() != -1)
+ if (I.value() != -1) // -1 is a placeholder for undef.
Selector |= (I.value() << (I.index() * 4));
}
More information about the llvm-commits
mailing list