[llvm] [NVPTX] Promote v2i8 to v2i16 (PR #111189)

Manasij Mukherjee via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 11:23:12 PDT 2024


https://github.com/manasij7479 created https://github.com/llvm/llvm-project/pull/111189

Promote v2i8 to v2i16, fixes a crash. 
Re-enable a test in NVPTX/vector-returns.ll

https://github.com/llvm/llvm-project/issues/104864

>From 096cd9b20459d582c369a9ab2d75476f0bd7cb13 Mon Sep 17 00:00:00 2001
From: Manasij Mukherjee <manasijm at nvidia.com>
Date: Tue, 3 Sep 2024 14:00:32 -0600
Subject: [PATCH] [NVPTX] Promote v2i8 to v2i16

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp |  4 ++++
 llvm/test/CodeGen/NVPTX/vector-returns.ll   | 15 +++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8718b7890bf58a..57bc5fe0ac361c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -236,6 +236,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
         // v*i8 are formally lowered as v4i8
         EltVT = MVT::v4i8;
         NumElts = (NumElts + 3) / 4;
+      } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
+        // v2i8 is promoted to v2i16
+        NumElts = 1;
+        EltVT = MVT::v2i16;
       }
       for (unsigned j = 0; j != NumElts; ++j) {
         ValueVTs.push_back(EltVT);
diff --git a/llvm/test/CodeGen/NVPTX/vector-returns.ll b/llvm/test/CodeGen/NVPTX/vector-returns.ll
index 0d2ad2c9bee750..956f74392ae130 100644
--- a/llvm/test/CodeGen/NVPTX/vector-returns.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-returns.ll
@@ -325,10 +325,17 @@ define <3 x i8> @byte3() {
   ret <3 x i8> zeroinitializer
 }
 
-; FIXME: This test causes a crash. 
-; define <2 x i8> @byte2() {
-;   ret <2 x i8> zeroinitializer
-; }
+define <2 x i8> @byte2() {
+; CHECK-LABEL: byte2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b32 %r1, 0;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
+  ret <2 x i8> zeroinitializer
+}
 
 define <1 x i8> @byte1() {
 ; CHECK-LABEL: byte1(



More information about the llvm-commits mailing list