[llvm] ca9b7e2 - [AArch64][SVE] Fix crash with icmp+select

Wed Apr 21 06:16:44 PDT 2021

Author: Caroline Concatto
Date: 2021-04-21T14:16:27+01:00
New Revision: ca9b7e2e2f0897dade16d785dafadbf75deaf405

URL: https://github.com/llvm/llvm-project/commit/ca9b7e2e2f0897dade16d785dafadbf75deaf405
DIFF: https://github.com/llvm/llvm-project/commit/ca9b7e2e2f0897dade16d785dafadbf75deaf405.diff

LOG: [AArch64][SVE] Fix crash with icmp+select

This patch changes the lowering of SELECT_CC from Legal to Expand for scalable
vector and adds support for scalable vectors in performSelectCombine.

When selecting the nodes to lower in visitSELECT it checks if it is possible to
use SELECT_CC in cases where SETCC is followed by SELECT. visistSELECT checks
if SELECT_CC is legal or custom to replace SELECT by SELECT_CC.
SELECT_CC used to be legal for scalable vector, so the node changes to
SELECT_CC. This used to crash the compiler as there is no support for SELECT_CC
with scalable vectors. So now the compiler lowers to VSELECT instead of
SELECT_CC.

Differential Revision: https://reviews.llvm.org/D100485

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 22e377ea0b74..fd7aa68f03c7 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1153,6 +1153,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
     }
 
     // Illegal unpacked integer vector types.
@@ -1171,6 +1172,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
 
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
+
       // There are no legal MVT::nxv16f## based types.
       if (VT != MVT::nxv16i1) {
         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
@@ -1218,6 +1221,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
     }
 
     for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
@@ -15326,6 +15331,9 @@ static SDValue performSelectCombine(SDNode *N,
   if (N0.getOpcode() != ISD::SETCC)
     return SDValue();
 
+  if (ResVT.isScalableVector())
+    return SDValue();
+
   // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
   // scalar SetCCResultType. We also don't expect vectors, because we assume
   // that selects fed by vector SETCCs are canonicalized to VSELECT.

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 327c7e540086..37a22d3a16aa 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1402,7 +1402,9 @@ multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1,  nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
 
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1,  nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1,  nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>;
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1,  nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1,  nxv2f16, nxv2f16, !cast<Instruction>(NAME # _D)>;
   def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1,  nxv2f32, nxv2f32, !cast<Instruction>(NAME # _D)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1,  nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index bbc879c34985..55f5f33d4d48 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -216,3 +216,198 @@ define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p,
   %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> %a, <vscale x 2 x double> %dst
   ret <vscale x 2 x double> %sel
 }
+
+; Check icmp+select
+
+define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2f16
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.d, xzr, x8
+; CHECK-NEXT:    sel	z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 2 x half> %a, <vscale x 2 x half> %b
+  ret <vscale x 2 x half> %sel
+}
+
+define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2f32
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.d, xzr, x8
+; CHECK-NEXT:    sel	z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 2 x float> %a, <vscale x 2 x float> %b
+  ret <vscale x 2 x float> %sel
+}
+
+define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2f64
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.d, xzr, x8
+; CHECK-NEXT:    sel	z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %b
+  ret <vscale x 2 x double> %sel
+}
+
+define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv4f16
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.s, xzr, x8
+; CHECK-NEXT:    sel	z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 4 x half> %a, <vscale x 4 x half> %b
+  ret <vscale x 4 x half> %sel
+}
+
+define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv4f32
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.s, xzr, x8
+; CHECK-NEXT:    sel	z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 4 x float> %a, <vscale x 4 x float> %b
+  ret <vscale x 4 x float> %sel
+}
+
+define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv8f16
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.h, xzr, x8
+; CHECK-NEXT:    sel	z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 8 x half> %a, <vscale x 8 x half> %b
+  ret <vscale x 8 x half> %sel
+}
+
+define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2i64
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.d, xzr, x8
+; CHECK-NEXT:    sel	z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b
+  ret <vscale x 2 x i64> %sel
+}
+
+define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv4i32
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.s, xzr, x8
+; CHECK-NEXT:    sel	z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b
+  ret <vscale x 4 x i32> %sel
+}
+
+define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv8i16
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.h, xzr, x8
+; CHECK-NEXT:    sel	z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b
+  ret <vscale x 8 x i16> %sel
+}
+
+define  <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv16i8
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p0.b, xzr, x8
+; CHECK-NEXT:    sel	z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %mask = icmp eq i64 %x0, 0
+  %sel = select i1 %mask, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b
+  ret <vscale x 16 x i8> %sel
+}
+
+define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv2i1
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p2.d, xzr, x8
+; CHECK-NEXT:    sel	p0.b, p2, p0.b, p1.b
+; CHECK-NEXT:    ret
+    %mask = icmp eq i64 %x0, 0
+    %sel = select i1 %mask, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b
+    ret <vscale x 2 x i1> %sel
+}
+define <vscale x 4 x i1> @icmp_select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv4i1
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p2.s, xzr, x8
+; CHECK-NEXT:    sel	p0.b, p2, p0.b, p1.b
+; CHECK-NEXT:    ret
+    %mask = icmp eq i64 %x0, 0
+    %sel = select i1 %mask, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b
+    ret <vscale x 4 x i1> %sel
+}
+define <vscale x 8 x i1> @icmp_select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv8i1
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p2.h, xzr, x8
+; CHECK-NEXT:    sel	p0.b, p2, p0.b, p1.b
+; CHECK-NEXT:    ret
+    %mask = icmp eq i64 %x0, 0
+    %sel = select i1 %mask, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b
+    ret <vscale x 8 x i1> %sel
+}
+define <vscale x 16 x i1> @icmp_select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i64 %x0) {
+; CHECK-LABEL: icmp_select_nxv16i1
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp	x0, #0
+; CHECK-NEXT:    cset	w8, eq
+; CHECK-NEXT:    sbfx	x8, x8, #0, #1
+; CHECK-NEXT:    whilelo	p2.b, xzr, x8
+; CHECK-NEXT:    sel	p0.b, p2, p0.b, p1.b
+; CHECK-NEXT:    ret
+    %mask = icmp eq i64 %x0, 0
+    %sel = select i1 %mask, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b
+    ret <vscale x 16 x i1> %sel
+}