[llvm] 8f5db53 - [AArch64][GlobalISel] Add support for some across-vector NEON intrinsics
Vladislav Dzhidzhoev via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 5 04:02:50 PDT 2023
Author: Vladislav Dzhidzhoev
Date: 2023-04-05T12:59:55+02:00
New Revision: 8f5db5332b2a4e9465f12e3192120eb28935e1a0
URL: https://github.com/llvm/llvm-project/commit/8f5db5332b2a4e9465f12e3192120eb28935e1a0
DIFF: https://github.com/llvm/llvm-project/commit/8f5db5332b2a4e9465f12e3192120eb28935e1a0.diff
LOG: [AArch64][GlobalISel] Add support for some across-vector NEON intrinsics
Support uaddv, saddv, umaxv, smaxv, uminv, sminv, fmaxv, fminv,
fmaxnmv, fminnmv intrinsics in GlobalISel.
GlobalISelEmitter couldn't import SelectionDAG patterns containing nodes
with 8-bit result type, since they had untyped values. Therefore,
register type for FPR8 is set to i8 to eliminate untyped nodes in these
patterns.
Differential Revision: https://reviews.llvm.org/D146531
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrGISel.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64RegisterInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/aarch64-addv.ll
llvm/test/CodeGen/AArch64/arm64-fminv.ll
llvm/test/CodeGen/AArch64/arm64-neon-across.ll
llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 70c4ba763a342..b3d093af1c165 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -302,3 +302,108 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
+
+multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+ def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+
+ def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ (i64 0)))>;
+ def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
+
+ def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
+ Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+ def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub))>;
+
+ def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ ssub))>;
+ def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub))>;
+
+ def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+}
+
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"ADDV", int_aarch64_neon_saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
+def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
+def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMINV", int_aarch64_neon_sminv>;
+def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SMINPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMAXV", int_aarch64_neon_umaxv>;
+def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMINV", int_aarch64_neon_uminv>;
+def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UMINPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4162da5f5f3c6..8db0dd9291f3d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2804,7 +2804,7 @@ defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
// Floating-point
-defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
+defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>;
defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
@@ -3569,7 +3569,7 @@ defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
// Floating-point
-defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
+defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
@@ -3979,7 +3979,7 @@ defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
// (immediate pre-indexed)
def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
-def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
+def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>;
def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
@@ -4033,7 +4033,7 @@ def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
// (immediate post-indexed)
def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
-def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
+def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>;
def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 9f26bd732609e..4bb1f9413f2ba 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -435,7 +435,7 @@ def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
}
-def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
+def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ba33e9cfe949c..af3825eb3997f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3166,15 +3166,15 @@ let Predicates = [HasSVEorSME] in {
let Predicates = [NotInStreamingSVEMode] in {
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
- def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
+ def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index))), i8),
(i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16),
(i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
- def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16),
+ def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index))), i16),
(i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
- def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ def : Pat<(sext (i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index))),
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
} // End NotInStreamingSVEMode
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 822a7b1e948fd..ebe870d870f52 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -481,14 +481,35 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
-/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
-static bool isFPIntrinsic(unsigned ID) {
+/// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC);
// TODO: Add more intrinsics.
- switch (ID) {
+ switch (MI.getIntrinsicID()) {
default:
return false;
case Intrinsic::aarch64_neon_uaddlv:
+ case Intrinsic::aarch64_neon_uaddv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ case Intrinsic::aarch64_neon_fmaxv:
+ case Intrinsic::aarch64_neon_fminv:
+ case Intrinsic::aarch64_neon_fmaxnmv:
+ case Intrinsic::aarch64_neon_fminnmv:
return true;
+ case Intrinsic::aarch64_neon_saddlv: {
+ const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ return SrcTy.getElementType().getSizeInBits() >= 16 &&
+ SrcTy.getElementCount().getFixedValue() >= 4;
+ }
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_sminv: {
+ const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ return SrcTy.getElementType().getSizeInBits() >= 32 &&
+ SrcTy.getElementCount().getFixedValue() >= 2;
+ }
}
}
@@ -497,7 +518,7 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
- if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
+ if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
return true;
// Do we have an explicit floating point instruction?
@@ -996,9 +1017,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC: {
// Check if we know that the intrinsic has any constraints on its register
// banks. If it does, then update the mapping accordingly.
- unsigned ID = MI.getIntrinsicID();
unsigned Idx = 0;
- if (!isFPIntrinsic(ID))
+ if (!isFPIntrinsic(MRI, MI))
break;
for (const auto &Op : MI.explicit_operands()) {
if (Op.isReg())
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index 15736933b61f8..38548e760ac9f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s -check-prefixes=CHECK,SDAG
+; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=generic 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL
; Function Attrs: nounwind readnone
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
@@ -9,6 +10,14 @@ declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
+; GISEL-NOT: Instruction selection used fallback path for add_B
+; GISEL-NOT: Instruction selection used fallback path for add_H
+; GISEL-NOT: Instruction selection used fallback path for add_S
+; GISEL-NOT: Instruction selection used fallback path for add_D
+; GISEL-NOT: Instruction selection used fallback path for oversized_ADDV_512
+; GISEL-NOT: Instruction selection used fallback path for addv_combine_i32
+; GISEL-NOT: Instruction selection used fallback path for addv_combine_i64
+
define i8 @add_B(ptr %arr) {
; CHECK-LABEL: add_B:
; CHECK: // %bb.0:
@@ -84,16 +93,27 @@ entry:
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
define i32 @oversized_ADDV_512(ptr %arr) {
-; CHECK-LABEL: oversized_ADDV_512:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
-; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: addv s0, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; SDAG-LABEL: oversized_ADDV_512:
+; SDAG: // %bb.0:
+; SDAG-NEXT: ldp q0, q1, [x0, #32]
+; SDAG-NEXT: ldp q3, q2, [x0]
+; SDAG-NEXT: add v0.4s, v3.4s, v0.4s
+; SDAG-NEXT: add v1.4s, v2.4s, v1.4s
+; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT: addv s0, v0.4s
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: oversized_ADDV_512:
+; GISEL: // %bb.0:
+; GISEL-NEXT: ldp q0, q1, [x0]
+; GISEL-NEXT: ldp q2, q3, [x0, #32]
+; GISEL-NEXT: add v0.4s, v0.4s, v1.4s
+; GISEL-NEXT: add v1.4s, v2.4s, v3.4s
+; GISEL-NEXT: add v0.4s, v0.4s, v1.4s
+; GISEL-NEXT: addv s0, v0.4s
+; GISEL-NEXT: fmov w0, s0
+; GISEL-NEXT: ret
%bin.rdx = load <16 x i32>, ptr %arr
%r = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
ret i32 %r
@@ -128,12 +148,21 @@ entry:
}
define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
-; CHECK-LABEL: addv_combine_i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: addv s0, v0.4s
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
+; SDAG-LABEL: addv_combine_i32:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
+; SDAG-NEXT: addv s0, v0.4s
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: addv_combine_i32:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: addv s0, v0.4s
+; GISEL-NEXT: addv s1, v1.4s
+; GISEL-NEXT: fmov w8, s0
+; GISEL-NEXT: fmov w9, s1
+; GISEL-NEXT: add w0, w8, w9
+; GISEL-NEXT: ret
entry:
%rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1)
%rdx.2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2)
@@ -142,12 +171,21 @@ entry:
}
define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
-; CHECK-LABEL: addv_combine_i64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: addp d0, v0.2d
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: ret
+; SDAG-LABEL: addv_combine_i64:
+; SDAG: // %bb.0: // %entry
+; SDAG-NEXT: add v0.2d, v0.2d, v1.2d
+; SDAG-NEXT: addp d0, v0.2d
+; SDAG-NEXT: fmov x0, d0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: addv_combine_i64:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: addp d0, v0.2d
+; GISEL-NEXT: addp d1, v1.2d
+; GISEL-NEXT: fmov x8, d0
+; GISEL-NEXT: fmov x9, d1
+; GISEL-NEXT: add x0, x8, x9
+; GISEL-NEXT: ret
entry:
%rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1)
%rdx.2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a2)
diff --git a/llvm/test/CodeGen/AArch64/arm64-fminv.ll b/llvm/test/CodeGen/AArch64/arm64-fminv.ll
index f4c97355dd197..90076a2b66f94 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fminv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fminv.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -global-isel=1 -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
define float @test_fminv_v2f32(<2 x float> %in) {
; CHECK: test_fminv_v2f32:
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
index 3a63673f12094..df5d9c06d0380 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-across.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 10e5e8bd45aef..f95fe77997d77 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -global-isel=1 -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
More information about the llvm-commits
mailing list