[PATCH] D145583: [AArch64][SME] Fix an infinite loop in DAGCombine related to adding -force-streaming-compatible-sve flag.
Dinar Temirbulatov via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 8 05:35:20 PST 2023
dtemirbulatov created this revision.
dtemirbulatov added reviewers: sdesmalen, CarolineConcatto, david-arm, MattDevereau.
Herald added subscribers: ctetreau, steven.zhang, hiraditya, kristof.beyls, tschuett.
Herald added a project: All.
dtemirbulatov requested review of this revision.
Herald added a project: LLVM.
Compiler hits infinite loop in DAGCombine. For force-streaming-compatible-sve mode we have custom lowering for 128-bit vector splats and later in DAGCombiner::SimplifyVCastOp() we scalarized SPLAT because we have custom lowering for SME. Later, we restored SPLAT opertion via performMulCombine().
https://reviews.llvm.org/D145583
Files:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/aarch64-force-streaming-compatible-sve.ll
Index: llvm/test/CodeGen/AArch64/aarch64-force-streaming-compatible-sve.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-force-streaming-compatible-sve.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve -force-streaming-compatible-sve | FileCheck %s --check-prefix=CHECK
+
+define void @jpeg_add_quant_table(i32 %0, <8 x i64> %1, ptr %2) {
+; CHECK-LABEL: jpeg_add_quant_table:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z4.s, w0
+; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: sunpklo z4.d, z4.s
+; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mul z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z4.d
+; CHECK-NEXT: mul z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: cmpgt p2.d, p0/z, z2.d, #0
+; CHECK-NEXT: mul z1.d, p0/m, z1.d, z4.d
+; CHECK-NEXT: cmpgt p1.d, p0/z, z3.d, #0
+; CHECK-NEXT: mov z3.d, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpgt p2.d, p0/z, z1.d, #0
+; CHECK-NEXT: cmpgt p0.d, p0/z, z0.d, #0
+; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT: ptrue p1.s, vl2
+; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT: splice z3.s, p1, z3.s, z2.s
+; CHECK-NEXT: splice z1.s, p1, z1.s, z0.s
+; CHECK-NEXT: uzp1 z2.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h
+; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h
+; CHECK-NEXT: and z0.h, z0.h, #0x1
+; CHECK-NEXT: str q0, [x1]
+; CHECK-NEXT: ret
+ %4 = sext i32 %0 to i64
+ %5 = insertelement <8 x i64> zeroinitializer, i64 %4, i64 0
+ %6 = shufflevector <8 x i64> %5, <8 x i64> zeroinitializer, <8 x i32> zeroinitializer
+ %7 = mul <8 x i64> %6, %1
+ %.not = icmp sgt <8 x i64> %7, zeroinitializer
+ %8 = zext <8 x i1> %.not to <8 x i16>
+ store <8 x i16> %8, ptr %2, align 2
+ ret void
+}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1222,6 +1222,8 @@
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
+
+ bool preferScalarizeSplat(unsigned Opc) const override;
};
namespace AArch64 {
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24495,3 +24495,10 @@
return nullptr;
}
+
+bool AArch64TargetLowering::preferScalarizeSplat(unsigned Opc) const {
+ if (Subtarget->forceStreamingCompatibleSVE() &&
+ (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND))
+ return false;
+ return true;
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D145583.503331.patch
Type: text/x-patch
Size: 3375 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230308/7109ebaa/attachment.bin>
More information about the llvm-commits
mailing list