[llvm] [GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR (PR #110561)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 12:59:06 PDT 2024
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/110561
for SVE.
Credits: https://github.com/llvm/llvm-project/pull/72976
LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64)
;; mul
define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { entry:
%c = mul <vscale x 2 x i64> %a, %b
store <vscale x 2 x i64> %c, ptr %p, align 16
ret void
}
define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { entry:
%c = mul <vscale x 4 x i32> %a, %b
store <vscale x 4 x i32> %c, ptr %p, align 16
ret void
}
define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { entry:
%c = mul <vscale x 8 x i16> %a, %b
store <vscale x 8 x i16> %c, ptr %p, align 16
ret void
}
define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) { entry:
%c = mul <vscale x 16 x i8> %a, %b
store <vscale x 16 x i8> %c, ptr %p, align 16
ret void
}
>From 3d0d229eb8a620fb079f4c7f871d254181f8acea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Mon, 30 Sep 2024 21:54:21 +0200
Subject: [PATCH] [GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and
G_XOR
for SVE.
Credits: https://github.com/llvm/llvm-project/pull/72976
LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64)
;; mul
define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
entry:
%c = mul <vscale x 2 x i64> %a, %b
store <vscale x 2 x i64> %c, ptr %p, align 16
ret void
}
define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
entry:
%c = mul <vscale x 4 x i32> %a, %b
store <vscale x 4 x i32> %c, ptr %p, align 16
ret void
}
define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
entry:
%c = mul <vscale x 8 x i16> %a, %b
store <vscale x 8 x i16> %c, ptr %p, align 16
ret void
}
define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
entry:
%c = mul <vscale x 16 x i8> %a, %b
store <vscale x 16 x i8> %c, ptr %p, align 16
ret void
}
---
.../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 14 +-
.../AArch64/AArch64GenRegisterBankInfo.def | 2 +
.../AArch64/GISel/AArch64CallLowering.cpp | 4 +-
.../GISel/AArch64InstructionSelector.cpp | 10 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 33 ++-
.../AArch64/GlobalISel/regbank-mul.mir | 25 ++
llvm/test/CodeGen/AArch64/sve-integer.ll | 268 ++++++++++++++++++
7 files changed, 347 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
create mode 100644 llvm/test/CodeGen/AArch64/sve-integer.ll
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 82e713f30ea31c..d42bfea2bd4438 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -970,8 +970,7 @@ class LegalizeRuleSet {
LegalizeAction::WidenScalar,
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && !VecTy.isScalable() &&
- VecTy.getSizeInBits() < VectorSize;
+ return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
},
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
@@ -1139,7 +1138,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() < MinElements;
},
[=](const LegalityQuery &Query) {
@@ -1157,7 +1156,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
(VecTy.getNumElements() % NumElts != 0);
},
[=](const LegalityQuery &Query) {
@@ -1177,7 +1176,7 @@ class LegalizeRuleSet {
LegalizeAction::FewerElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+ return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() > MaxElements;
},
[=](const LegalityQuery &Query) {
@@ -1198,6 +1197,11 @@ class LegalizeRuleSet {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");
+ if (MinTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+ if (MaxTy.isScalableVector())
+ return actionIf(LegalizeAction::Unsupported, always);
+
const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 82066b48c84b40..8ff59f60968beb 100644
--- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
const unsigned MinSize = Size.getKnownMinValue();
assert((!Size.isScalable() || MinSize >= 128) &&
"Scalable vector types should have size of at least 128 bits");
+ if (Size.isScalable())
+ return 3;
if (MinSize <= 16)
return 0;
if (MinSize <= 32)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5aee7804de3e3f..6cbfb018b3183a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
auto &Flags = CurArgInfo.Flags[0];
- if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
- !Flags.isZExt()) {
+ if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
+ !Flags.isSExt() && !Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index df0c09d32c074a..afea08ab092501 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
+ assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
}
if (RegBankID == AArch64::FPRRegBankID) {
+ if (SizeInBits.isScalable()) {
+ assert(SizeInBits == TypeSize::getScalable(128) &&
+ "Unexpected scalable register size");
+ return &AArch64::ZPRRegClass;
+ }
+
switch (SizeInBits) {
default:
return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
- if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ if (SrcRegBank != DstRegBank &&
+ (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
SrcSize = DstSize = TypeSize::getFixed(32);
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 51aeee023f2e34..910a4ab2ddc178 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
+ const bool HasSVE = ST.hasSVE();
getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,37 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ auto &IntegerArithmeticActions =
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR});
+ if (HasSVE)
+ IntegerArithmeticActions.legalFor({nxv16s8, nxv8s16, nxv4s32, nxv2s64});
+ IntegerArithmeticActions
+ .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 2;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 4;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 16;
+ },
+ 0, s8)
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .moreElementsToNextPow2(0);
+
+ getActionDefinitionsBuilder(G_MUL)
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
new file mode 100644
index 00000000000000..d2e76227741cb6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-mul.mir
@@ -0,0 +1,25 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -mattr=+sve -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: fp_inputs
+legalized: true
+body: |
+ bb.0:
+ liveins: $s0, $s1
+
+ ; CHECK-LABEL: name: fp_inputs
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: %rhs:fpr(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: %res:fpr(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ ; CHECK-NEXT: $z0 = COPY %res(<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $z0
+ %lhs:_(<vscale x 2 x s64>) = COPY $z0
+ %rhs:_(<vscale x 2 x s64>) = COPY $z1
+ %res:_(<vscale x 2 x s64>) = G_MUL %lhs, %rhs
+ $z0 = COPY %res(<vscale x 2 x s64>)
+ RET_ReallyLR implicit $z0
+
diff --git a/llvm/test/CodeGen/AArch64/sve-integer.ll b/llvm/test/CodeGen/AArch64/sve-integer.ll
new file mode 100644
index 00000000000000..ad66190839ce0f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-integer.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; add
+define void @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: addnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: addnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: addnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: addnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = add <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; sub
+define void @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: subnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: subnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.s, z0.s, z1.s
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: subnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.h, z0.h, z1.h
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: subnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub z0.b, z0.b, z1.b
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = sub <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; and
+define void @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: andnxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: andnxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: andnxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: andnxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = and <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; or
+define void @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: ornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: ornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: ornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: ornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = or <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
+
+;; xor
+define void @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) {
+; CHECK-LABEL: xornxv2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 2 x i64> %a, %b
+ store <vscale x 2 x i64> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) {
+; CHECK-LABEL: xornxv4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 4 x i32> %a, %b
+ store <vscale x 4 x i32> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: xornxv8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 8 x i16> %a, %b
+ store <vscale x 8 x i16> %c, ptr %p, align 16
+ ret void
+}
+
+define void @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, ptr %p) {
+; CHECK-LABEL: xornxv16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %c = xor <vscale x 16 x i8> %a, %b
+ store <vscale x 16 x i8> %c, ptr %p, align 16
+ ret void
+}
More information about the llvm-commits
mailing list