[llvm] [PowerPC] fix legalization crash (PR #105563)

Wed Aug 21 11:14:35 PDT 2024

https://github.com/RolandF77 created https://github.com/llvm/llvm-project/pull/105563

If v2i64 scalar_to_vector is made custom, llc can crash in certain legalization cases where v2i64 vectors are injected, even if they weren't otherwise present. The code generated would be fine, but that operation is not handled in ReplaceNodeResults. Add handling.

>From f543a053cb873ad0a28c5d63db78f2193ba62445 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 21 Aug 2024 17:46:09 +0000
Subject: [PATCH] fix legalization crash

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  6 +++
 llvm/test/CodeGen/PowerPC/custom-stov.ll    | 60 +++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/custom-stov.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8ff9f5a5a991e0..8671eae2920c25 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12032,6 +12032,12 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(Lowered);
     return;
   }
+  case ISD::SCALAR_TO_VECTOR: {
+    SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
+    if (Lowered)
+      Results.push_back(Lowered);
+    return;
+  }
   case ISD::FSHL:
   case ISD::FSHR:
     // Don't handle funnel shifts here.
diff --git a/llvm/test/CodeGen/PowerPC/custom-stov.ll b/llvm/test/CodeGen/PowerPC/custom-stov.ll
new file mode 100644
index 00000000000000..77c626dfae1f72
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/custom-stov.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @_blah() #0 {
+; CHECK-LABEL: _blah:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    li 4, 15
+; CHECK-NEXT:    lvx 3, 0, 4
+; CHECK-NEXT:    addi 5, 1, -64
+; CHECK-NEXT:    lvx 4, 0, 3
+; CHECK-NEXT:    lvsl 2, 0, 3
+; CHECK-NEXT:    vperm 2, 4, 3, 2
+; CHECK-NEXT:    lwz 4, 16(0)
+; CHECK-NEXT:    stvx 2, 0, 5
+; CHECK-NEXT:    lhz 5, -64(1)
+; CHECK-NEXT:    lhz 6, -58(1)
+; CHECK-NEXT:    lhz 7, -52(1)
+; CHECK-NEXT:    sth 4, -34(1)
+; CHECK-NEXT:    sth 3, -36(1)
+; CHECK-NEXT:    sth 3, -40(1)
+; CHECK-NEXT:    sth 3, -44(1)
+; CHECK-NEXT:    sth 3, -48(1)
+; CHECK-NEXT:    addi 3, 1, -48
+; CHECK-NEXT:    sth 7, -38(1)
+; CHECK-NEXT:    sth 6, -42(1)
+; CHECK-NEXT:    sth 5, -46(1)
+; CHECK-NEXT:    lvx 2, 0, 3
+; CHECK-NEXT:    addi 3, 1, -32
+; CHECK-NEXT:    vsldoi 3, 2, 2, 8
+; CHECK-NEXT:    vmaxuw 2, 2, 3
+; CHECK-NEXT:    vspltw 3, 2, 1
+; CHECK-NEXT:    vmaxuw 2, 2, 3
+; CHECK-NEXT:    stvx 2, 0, 3
+; CHECK-NEXT:    lwz 3, -32(1)
+; CHECK-NEXT:    cmplwi 3, 0
+; CHECK-NEXT:    blr
+entry:
+  %wide.vec904 = load <12 x i16>, ptr null, align 2
+  %strided.vec905 = shufflevector <12 x i16> %wide.vec904, <12 x i16> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %0 = zext <4 x i16> %strided.vec905 to <4 x i32>
+  %1 = tail call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %0)
+  %cmp55.not823 = icmp ugt i32 1, %1
+  br i1 %cmp55.not823, label %for.cond.cleanup56, label %for.body57.lr.ph
+
+for.body57.lr.ph:                                 ; preds = %entry
+  ret void
+
+for.cond.cleanup56:                               ; preds = %entry
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) #1
+
+attributes #0 = { "target-cpu"="ppc64" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }