[llvm] [NVPTX] expand trunk v2i32->v2i16 (PR #161715)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 2 11:20:39 PDT 2025
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/161715
>From d10c4152c56522ad175663d0ee55a2bd48ab198e Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Thu, 2 Oct 2025 10:57:24 -0700
Subject: [PATCH 1/2] [NVPTX] expand trunc v2i32->v2i16
#153478 made v2i32 legal on newer GPUs, but we can not lower all operations yet.
Expand the `trunc` operation until we implement efficient lowering.
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3ac7c2874408b..48e539037dcc7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -638,6 +638,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// No support for these operations with v2f32/v2i32
setOperationAction(ISD::INSERT_VECTOR_ELT, {MVT::v2f32, MVT::v2i32}, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, {MVT::v2f32, MVT::v2i32}, Expand);
+
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Expand);
+
// Need custom lowering in case the index is dynamic.
if (STI.hasF32x2Instructions())
setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2f32, MVT::v2i32},
>From 23da2a5c5b5938aa974755ba269dad45f0e39840 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Thu, 2 Oct 2025 11:10:52 -0700
Subject: [PATCH 2/2] added a test case
---
.../test/CodeGen/NVPTX/f32x2-convert-i32x2.ll | 37 +++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll b/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
index 18fb87935d17d..4bfae5c437b85 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-convert-i32x2.ll
@@ -115,5 +115,42 @@ define ptx_kernel void @inlineasm(ptr %p) {
store <2 x float> %mul, ptr %p, align 8
ret void
}
+
+define ptx_kernel void @trunc_v2i32(<2 x i32> %0) {
+; CHECK-SM90A-LABEL: trunc_v2i32(
+; CHECK-SM90A: {
+; CHECK-SM90A-NEXT: .reg .b32 %r<7>;
+; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
+; CHECK-SM90A-EMPTY:
+; CHECK-SM90A-NEXT: // %bb.0:
+; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r1, %r2}, [trunc_v2i32_param_0];
+; CHECK-SM90A-NEXT: prmt.b32 %r3, %r1, %r2, 0x3340U;
+; CHECK-SM90A-NEXT: mov.b32 %r4, 0;
+; CHECK-SM90A-NEXT: prmt.b32 %r5, %r4, 0, 0x3340U;
+; CHECK-SM90A-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
+; CHECK-SM90A-NEXT: mov.b64 %rd1, 0;
+; CHECK-SM90A-NEXT: st.b32 [%rd1], %r6;
+; CHECK-SM90A-NEXT: ret;
+;
+; CHECK-SM100-LABEL: trunc_v2i32(
+; CHECK-SM100: {
+; CHECK-SM100-NEXT: .reg .b32 %r<7>;
+; CHECK-SM100-NEXT: .reg .b64 %rd<3>;
+; CHECK-SM100-EMPTY:
+; CHECK-SM100-NEXT: // %bb.0:
+; CHECK-SM100-NEXT: ld.param.b64 %rd1, [trunc_v2i32_param_0];
+; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd1;
+; CHECK-SM100-NEXT: mov.b32 %r3, 0;
+; CHECK-SM100-NEXT: prmt.b32 %r4, %r3, 0, 0x3340U;
+; CHECK-SM100-NEXT: prmt.b32 %r5, %r1, %r2, 0x3340U;
+; CHECK-SM100-NEXT: prmt.b32 %r6, %r4, %r5, 0x5410U;
+; CHECK-SM100-NEXT: mov.b64 %rd2, 0;
+; CHECK-SM100-NEXT: st.b32 [%rd2], %r6;
+; CHECK-SM100-NEXT: ret;
+ %2 = trunc <2 x i32> %0 to <2 x i8>
+ %3 = shufflevector <2 x i8> zeroinitializer, <2 x i8> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i8> %3, ptr null, align 4
+ ret void
+}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
More information about the llvm-commits
mailing list