[llvm] [NVPTX] Fix segfault with i128 types in arrays (PR #120562)
Valery Chernov via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 03:55:10 PST 2024
https://github.com/vvchernov created https://github.com/llvm/llvm-project/pull/120562
- Process i128 array with custom ComputePTXValueVTs. The i128 elements should be handled and split into i64 types in the recursion.
- Add corresponding test
>From 868b4c1875a50c232d7bab7250207e7db65d5229 Mon Sep 17 00:00:00 2001
From: Valery Chernov <vchernov at nvidia.com>
Date: Mon, 9 Dec 2024 16:55:46 +0400
Subject: [PATCH] process i128 array with custom ComputePTXValueVTs
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 11 +++++++++
llvm/test/CodeGen/NVPTX/i128-array.ll | 25 +++++++++++++++++++++
2 files changed, 36 insertions(+)
create mode 100644 llvm/test/CodeGen/NVPTX/i128-array.ll
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ce94dded815b8f..4282965c3cddea 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -198,6 +198,17 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
return;
}
+ // Given an array type, recursively traverse the elements with custom ComputePTXValueVTs.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ ComputePTXValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ }
+ return;
+ }
+
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
diff --git a/llvm/test/CodeGen/NVPTX/i128-array.ll b/llvm/test/CodeGen/NVPTX/i128-array.ll
new file mode 100644
index 00000000000000..4054d721d04863
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i128-array.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -O0 -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+define [2 x i128] @foo(i64 %a, i32 %b) {
+; CHECK-LABEL: foo(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [foo_param_1];
+; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
+; CHECK-NEXT: shr.s64 %rd2, %rd1, 63;
+; CHECK-NEXT: cvt.s64.s32 %rd3, %r1;
+; CHECK-NEXT: shr.s64 %rd4, %rd3, 63;
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4};
+; CHECK-NEXT: ret;
+ %1 = sext i64 %a to i128
+ %2 = sext i32 %b to i128
+ %3 = insertvalue [ 2 x i128 ] undef, i128 %1, 0
+ %4 = insertvalue [ 2 x i128 ] %3, i128 %2, 1
+
+ ret [ 2 x i128 ] %4
+}
More information about the llvm-commits
mailing list