[llvm] [PowerPC] fix legalization crash (PR #105563)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 12:15:50 PDT 2024
https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/105563
>From f543a053cb873ad0a28c5d63db78f2193ba62445 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Wed, 21 Aug 2024 17:46:09 +0000
Subject: [PATCH 1/2] fix legalization crash
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 +++
llvm/test/CodeGen/PowerPC/custom-stov.ll | 60 +++++++++++++++++++++
2 files changed, 66 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/custom-stov.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8ff9f5a5a991e0..8671eae2920c25 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12032,6 +12032,12 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Lowered);
return;
}
+ case ISD::SCALAR_TO_VECTOR: {
+ SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
+ return;
+ }
case ISD::FSHL:
case ISD::FSHR:
// Don't handle funnel shifts here.
diff --git a/llvm/test/CodeGen/PowerPC/custom-stov.ll b/llvm/test/CodeGen/PowerPC/custom-stov.ll
new file mode 100644
index 00000000000000..77c626dfae1f72
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/custom-stov.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @_blah() #0 {
+; CHECK-LABEL: _blah:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 15
+; CHECK-NEXT: lvx 3, 0, 4
+; CHECK-NEXT: addi 5, 1, -64
+; CHECK-NEXT: lvx 4, 0, 3
+; CHECK-NEXT: lvsl 2, 0, 3
+; CHECK-NEXT: vperm 2, 4, 3, 2
+; CHECK-NEXT: lwz 4, 16(0)
+; CHECK-NEXT: stvx 2, 0, 5
+; CHECK-NEXT: lhz 5, -64(1)
+; CHECK-NEXT: lhz 6, -58(1)
+; CHECK-NEXT: lhz 7, -52(1)
+; CHECK-NEXT: sth 4, -34(1)
+; CHECK-NEXT: sth 3, -36(1)
+; CHECK-NEXT: sth 3, -40(1)
+; CHECK-NEXT: sth 3, -44(1)
+; CHECK-NEXT: sth 3, -48(1)
+; CHECK-NEXT: addi 3, 1, -48
+; CHECK-NEXT: sth 7, -38(1)
+; CHECK-NEXT: sth 6, -42(1)
+; CHECK-NEXT: sth 5, -46(1)
+; CHECK-NEXT: lvx 2, 0, 3
+; CHECK-NEXT: addi 3, 1, -32
+; CHECK-NEXT: vsldoi 3, 2, 2, 8
+; CHECK-NEXT: vmaxuw 2, 2, 3
+; CHECK-NEXT: vspltw 3, 2, 1
+; CHECK-NEXT: vmaxuw 2, 2, 3
+; CHECK-NEXT: stvx 2, 0, 3
+; CHECK-NEXT: lwz 3, -32(1)
+; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: blr
+entry:
+ %wide.vec904 = load <12 x i16>, ptr null, align 2
+ %strided.vec905 = shufflevector <12 x i16> %wide.vec904, <12 x i16> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ %0 = zext <4 x i16> %strided.vec905 to <4 x i32>
+ %1 = tail call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %0)
+ %cmp55.not823 = icmp ugt i32 1, %1
+ br i1 %cmp55.not823, label %for.cond.cleanup56, label %for.body57.lr.ph
+
+for.body57.lr.ph: ; preds = %entry
+ ret void
+
+for.cond.cleanup56: ; preds = %entry
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) #1
+
+attributes #0 = { "target-cpu"="ppc64" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
>From f1583d909678ed4c86f76466312d708f809af2be Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Fri, 23 Aug 2024 19:25:55 +0000
Subject: [PATCH 2/2] update test
---
llvm/test/CodeGen/PowerPC/custom-stov.ll | 73 +++++++++++-------------
1 file changed, 33 insertions(+), 40 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/custom-stov.ll b/llvm/test/CodeGen/PowerPC/custom-stov.ll
index 77c626dfae1f72..ebe902bf952704 100644
--- a/llvm/test/CodeGen/PowerPC/custom-stov.ll
+++ b/llvm/test/CodeGen/PowerPC/custom-stov.ll
@@ -1,42 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mcpu=ppc64 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
-target datalayout = "E-m:e-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
-target triple = "powerpc64-unknown-linux-gnu"
-
-define void @_blah() #0 {
+define void @_blah() {
; CHECK-LABEL: _blah:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 15
-; CHECK-NEXT: lvx 3, 0, 4
-; CHECK-NEXT: addi 5, 1, -64
-; CHECK-NEXT: lvx 4, 0, 3
-; CHECK-NEXT: lvsl 2, 0, 3
-; CHECK-NEXT: vperm 2, 4, 3, 2
-; CHECK-NEXT: lwz 4, 16(0)
-; CHECK-NEXT: stvx 2, 0, 5
-; CHECK-NEXT: lhz 5, -64(1)
-; CHECK-NEXT: lhz 6, -58(1)
-; CHECK-NEXT: lhz 7, -52(1)
-; CHECK-NEXT: sth 4, -34(1)
-; CHECK-NEXT: sth 3, -36(1)
-; CHECK-NEXT: sth 3, -40(1)
-; CHECK-NEXT: sth 3, -44(1)
-; CHECK-NEXT: sth 3, -48(1)
-; CHECK-NEXT: addi 3, 1, -48
-; CHECK-NEXT: sth 7, -38(1)
-; CHECK-NEXT: sth 6, -42(1)
-; CHECK-NEXT: sth 5, -46(1)
-; CHECK-NEXT: lvx 2, 0, 3
-; CHECK-NEXT: addi 3, 1, -32
-; CHECK-NEXT: vsldoi 3, 2, 2, 8
-; CHECK-NEXT: vmaxuw 2, 2, 3
-; CHECK-NEXT: vspltw 3, 2, 1
-; CHECK-NEXT: vmaxuw 2, 2, 3
-; CHECK-NEXT: stvx 2, 0, 3
-; CHECK-NEXT: lwz 3, -32(1)
-; CHECK-NEXT: cmplwi 3, 0
+; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: li r4, 15
+; CHECK-NEXT: lvx v3, 0, r4
+; CHECK-NEXT: addi r5, r1, -64
+; CHECK-NEXT: lvx v4, 0, r3
+; CHECK-NEXT: lvsl v2, 0, r3
+; CHECK-NEXT: vperm v2, v4, v3, v2
+; CHECK-NEXT: lwz r4, 16(0)
+; CHECK-NEXT: stvx v2, 0, r5
+; CHECK-NEXT: lhz r5, -64(r1)
+; CHECK-NEXT: lhz r6, -58(r1)
+; CHECK-NEXT: lhz r7, -52(r1)
+; CHECK-NEXT: sth r4, -34(r1)
+; CHECK-NEXT: sth r3, -36(r1)
+; CHECK-NEXT: sth r3, -40(r1)
+; CHECK-NEXT: sth r3, -44(r1)
+; CHECK-NEXT: sth r3, -48(r1)
+; CHECK-NEXT: addi r3, r1, -48
+; CHECK-NEXT: sth r7, -38(r1)
+; CHECK-NEXT: sth r6, -42(r1)
+; CHECK-NEXT: sth r5, -46(r1)
+; CHECK-NEXT: lvx v2, 0, r3
+; CHECK-NEXT: addi r3, r1, -32
+; CHECK-NEXT: vsldoi v3, v2, v2, 8
+; CHECK-NEXT: vmaxuw v2, v2, v3
+; CHECK-NEXT: vspltw v3, v2, 1
+; CHECK-NEXT: vmaxuw v2, v2, v3
+; CHECK-NEXT: stvx v2, 0, r3
+; CHECK-NEXT: lwz r3, -32(r1)
+; CHECK-NEXT: cmplwi r3, 0
; CHECK-NEXT: blr
entry:
%wide.vec904 = load <12 x i16>, ptr null, align 2
@@ -53,8 +50,4 @@ for.cond.cleanup56: ; preds = %entry
ret void
}
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) #1
-
-attributes #0 = { "target-cpu"="ppc64" }
-attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
More information about the llvm-commits
mailing list