[llvm] [AArch64] Combine (and/or X, (dup (not Y))) -> (bic/orn X, (dup Y)) (PR #175739)
Piotr Fusik via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 00:32:54 PST 2026
https://github.com/pfusik updated https://github.com/llvm/llvm-project/pull/175739
>From 8b2e680a59e0e055b29a267242c224760a890d51 Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Tue, 13 Jan 2026 11:39:21 +0100
Subject: [PATCH 1/4] [AArch64][test] Combine (and/or X, (dup (not Y))) ->
(bic/orn X, (dup Y))
---
.../AArch64/neon-bitwise-instructions.ll | 364 ++++++++++++++++++
1 file changed, 364 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 01aea72d77114..3aebedd2bdb1c 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -2868,3 +2868,367 @@ entry:
%vqaddq_v2.i26515 = tail call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16> %vbsl5.i)
ret <8 x i16> %vqaddq_v2.i26515
}
+
+define <8 x i8> @and_dup_not_v8i8(<8 x i8> %a, i8 %m) {
+; CHECK-LABEL: and_dup_not_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8b, w8
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <8 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %and = and <8 x i8> %a, %shuffle
+ ret <8 x i8> %and
+}
+
+define <8 x i8> @and_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
+; CHECK-LABEL: and_dup_not_v8i8_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8b, w8
+; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <8 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %and = and <8 x i8> %shuffle, %a
+ ret <8 x i8> %and
+}
+
+define <16 x i8> @and_dup_not_v16i8(<16 x i8> %a, i8 %m) {
+; CHECK-LABEL: and_dup_not_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.16b, w8
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <16 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+ %and = and <16 x i8> %a, %shuffle
+ ret <16 x i8> %and
+}
+
+define <16 x i8> @and_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
+; CHECK-LABEL: and_dup_not_v16i8_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.16b, w8
+; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <16 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+ %and = and <16 x i8> %shuffle, %a
+ ret <16 x i8> %and
+}
+
+define <4 x i16> @and_dup_not_v4i16(<4 x i16> %a, i16 %m) {
+; CHECK-LABEL: and_dup_not_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <4 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+ %and = and <4 x i16> %a, %shuffle
+ ret <4 x i16> %and
+}
+
+define <4 x i16> @and_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
+; CHECK-LABEL: and_dup_not_v4i16_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <4 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+ %and = and <4 x i16> %shuffle, %a
+ ret <4 x i16> %and
+}
+
+define <8 x i16> @and_dup_not_v8i16(<8 x i16> %a, i16 %m) {
+; CHECK-LABEL: and_dup_not_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <8 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+ %and = and <8 x i16> %a, %shuffle
+ ret <8 x i16> %and
+}
+
+define <8 x i16> @and_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
+; CHECK-LABEL: and_dup_not_v8i16_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <8 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+ %and = and <8 x i16> %shuffle, %a
+ ret <8 x i16> %and
+}
+
+define <2 x i32> @and_dup_not_v2i32(<2 x i32> %a, i32 %m) {
+; CHECK-LABEL: and_dup_not_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <2 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+ %and = and <2 x i32> %a, %shuffle
+ ret <2 x i32> %and
+}
+
+define <2 x i32> @and_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
+; CHECK-LABEL: and_dup_not_v2i32_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <2 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+ %and = and <2 x i32> %shuffle, %a
+ ret <2 x i32> %and
+}
+
+define <4 x i32> @and_dup_not_v4i32(<4 x i32> %a, i32 %m) {
+; CHECK-LABEL: and_dup_not_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <4 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+ %and = and <4 x i32> %a, %shuffle
+ ret <4 x i32> %and
+}
+
+define <4 x i32> @and_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
+; CHECK-LABEL: and_dup_not_v4i32_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <4 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+ %and = and <4 x i32> %shuffle, %a
+ ret <4 x i32> %and
+}
+
+define <2 x i64> @and_dup_not_v2i64(<2 x i64> %a, i64 %m) {
+; CHECK-LABEL: and_dup_not_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn x8, x0
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i64 %m, -1
+ %insert = insertelement <2 x i64> poison, i64 %not, i64 0
+ %shuffle = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+ %and = and <2 x i64> %a, %shuffle
+ ret <2 x i64> %and
+}
+
+define <8 x i8> @or_dup_not_v8i8(<8 x i8> %a, i8 %m) {
+; CHECK-LABEL: or_dup_not_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8b, w8
+; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <8 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %or = or <8 x i8> %a, %shuffle
+ ret <8 x i8> %or
+}
+
+define <8 x i8> @or_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
+; CHECK-LABEL: or_dup_not_v8i8_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8b, w8
+; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <8 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+ %or = or <8 x i8> %shuffle, %a
+ ret <8 x i8> %or
+}
+
+define <16 x i8> @or_dup_not_v16i8(<16 x i8> %a, i8 %m) {
+; CHECK-LABEL: or_dup_not_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.16b, w8
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <16 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+ %or = or <16 x i8> %a, %shuffle
+ ret <16 x i8> %or
+}
+
+define <16 x i8> @or_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
+; CHECK-LABEL: or_dup_not_v16i8_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.16b, w8
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i8 %m, -1
+ %insert = insertelement <16 x i8> poison, i8 %not, i64 0
+ %shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
+ %or = or <16 x i8> %shuffle, %a
+ ret <16 x i8> %or
+}
+
+define <4 x i16> @or_dup_not_v4i16(<4 x i16> %a, i16 %m) {
+; CHECK-LABEL: or_dup_not_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <4 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+ %or = or <4 x i16> %a, %shuffle
+ ret <4 x i16> %or
+}
+
+define <4 x i16> @or_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
+; CHECK-LABEL: or_dup_not_v4i16_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4h, w8
+; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <4 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+ %or = or <4 x i16> %shuffle, %a
+ ret <4 x i16> %or
+}
+
+define <8 x i16> @or_dup_not_v8i16(<8 x i16> %a, i16 %m) {
+; CHECK-LABEL: or_dup_not_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <8 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+ %or = or <8 x i16> %a, %shuffle
+ ret <8 x i16> %or
+}
+
+define <8 x i16> @or_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
+; CHECK-LABEL: or_dup_not_v8i16_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i16 %m, -1
+ %insert = insertelement <8 x i16> poison, i16 %not, i64 0
+ %shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
+ %or = or <8 x i16> %shuffle, %a
+ ret <8 x i16> %or
+}
+
+define <2 x i32> @or_dup_not_v2i32(<2 x i32> %a, i32 %m) {
+; CHECK-LABEL: or_dup_not_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <2 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+ %or = or <2 x i32> %a, %shuffle
+ ret <2 x i32> %or
+}
+
+define <2 x i32> @or_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
+; CHECK-LABEL: or_dup_not_v2i32_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <2 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+ %or = or <2 x i32> %shuffle, %a
+ ret <2 x i32> %or
+}
+
+define <4 x i32> @or_dup_not_v4i32(<4 x i32> %a, i32 %m) {
+; CHECK-LABEL: or_dup_not_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <4 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+ %or = or <4 x i32> %a, %shuffle
+ ret <4 x i32> %or
+}
+
+define <4 x i32> @or_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
+; CHECK-LABEL: or_dup_not_v4i32_swapped:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn w8, w0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: ret
+ %not = xor i32 %m, -1
+ %insert = insertelement <4 x i32> poison, i32 %not, i64 0
+ %shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
+ %or = or <4 x i32> %shuffle, %a
+ ret <4 x i32> %or
+}
+
+define <2 x i64> @or_dup_not_v2i64(<2 x i64> %a, i64 %m) {
+; CHECK-LABEL: or_dup_not_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mvn x8, x0
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %not = xor i64 %m, -1
+ %insert = insertelement <2 x i64> poison, i64 %not, i64 0
+ %shuffle = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
+ %or = or <2 x i64> %a, %shuffle
+ ret <2 x i64> %or
+}
>From 03191e24d780c9a6a4f986faa24def4edcb9a53f Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Tue, 13 Jan 2026 11:39:40 +0100
Subject: [PATCH 2/4] [AArch64] Combine (and/or X, (dup (not Y))) -> (bic/orn
X, (dup Y))
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 +
.../AArch64/neon-bitwise-instructions.ll | 338 +++++++++++-------
2 files changed, 229 insertions(+), 126 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ace85b04595b8..8dccc892bf2dd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7856,6 +7856,23 @@ def : DUPWithTruncPat<v16i8, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
def : DUPWithTruncPat<v4i16, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
def : DUPWithTruncPat<v8i16, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
+multiclass BicOrnDupGpr<ValueType vectype, RegisterClass regtype> {
+ defvar vecreg = !cast<RegisterOperand>("V" # vectype.Size);
+ defvar bvectype = "v" # !srl(vectype.Size, 3) # "i8";
+ def : Pat<(vectype (and vecreg:$Rn, (AArch64dup (not regtype:$Rm)))),
+ (!cast<Instruction>("BIC" # bvectype) vectype:$Rn, (!cast<Instruction>("DUP" # vectype # "gpr") regtype:$Rm))>;
+ def : Pat<(vectype (or vecreg:$Rn, (AArch64dup (not regtype:$Rm)))),
+ (!cast<Instruction>("ORN" # bvectype) vectype:$Rn, (!cast<Instruction>("DUP" # vectype # "gpr") regtype:$Rm))>;
+}
+
+defm : BicOrnDupGpr<v8i8, GPR32>;
+defm : BicOrnDupGpr<v16i8, GPR32>;
+defm : BicOrnDupGpr<v4i16, GPR32>;
+defm : BicOrnDupGpr<v8i16, GPR32>;
+defm : BicOrnDupGpr<v2i32, GPR32>;
+defm : BicOrnDupGpr<v4i32, GPR32>;
+defm : BicOrnDupGpr<v2i64, GPR64>;
+
// SMOV and UMOV definitions, with some extra patterns for convenience
defm SMOV : SMov;
defm UMOV : UMov;
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 3aebedd2bdb1c..576fe9d1181e4 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -2870,12 +2870,18 @@ entry:
}
define <8 x i8> @and_dup_not_v8i8(<8 x i8> %a, i8 %m) {
-; CHECK-LABEL: and_dup_not_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8b, w8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8b, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8b, w8
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <8 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -2884,12 +2890,18 @@ define <8 x i8> @and_dup_not_v8i8(<8 x i8> %a, i8 %m) {
}
define <8 x i8> @and_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
-; CHECK-LABEL: and_dup_not_v8i8_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8b, w8
-; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v8i8_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8b, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v8i8_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8b, w8
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <8 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -2898,12 +2910,18 @@ define <8 x i8> @and_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
}
define <16 x i8> @and_dup_not_v16i8(<16 x i8> %a, i8 %m) {
-; CHECK-LABEL: and_dup_not_v16i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.16b, w8
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v16i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v16i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.16b, w8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <16 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -2912,12 +2930,18 @@ define <16 x i8> @and_dup_not_v16i8(<16 x i8> %a, i8 %m) {
}
define <16 x i8> @and_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
-; CHECK-LABEL: and_dup_not_v16i8_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.16b, w8
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v16i8_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v16i8_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.16b, w8
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <16 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -2926,12 +2950,18 @@ define <16 x i8> @and_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
}
define <4 x i16> @and_dup_not_v4i16(<4 x i16> %a, i16 %m) {
-; CHECK-LABEL: and_dup_not_v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4h, w8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4h, w8
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <4 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -2940,12 +2970,18 @@ define <4 x i16> @and_dup_not_v4i16(<4 x i16> %a, i16 %m) {
}
define <4 x i16> @and_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
-; CHECK-LABEL: and_dup_not_v4i16_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4h, w8
-; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v4i16_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v4i16_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4h, w8
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <4 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -2954,12 +2990,18 @@ define <4 x i16> @and_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
}
define <8 x i16> @and_dup_not_v8i16(<8 x i16> %a, i16 %m) {
-; CHECK-LABEL: and_dup_not_v8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8h, w8
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8h, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8h, w8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <8 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -2968,12 +3010,18 @@ define <8 x i16> @and_dup_not_v8i16(<8 x i16> %a, i16 %m) {
}
define <8 x i16> @and_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
-; CHECK-LABEL: and_dup_not_v8i16_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8h, w8
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v8i16_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8h, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v8i16_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8h, w8
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <8 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -2984,9 +3032,8 @@ define <8 x i16> @and_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
define <2 x i32> @and_dup_not_v2i32(<2 x i32> %a, i32 %m) {
; CHECK-LABEL: and_dup_not_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
@@ -2998,9 +3045,8 @@ define <2 x i32> @and_dup_not_v2i32(<2 x i32> %a, i32 %m) {
define <2 x i32> @and_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
; CHECK-LABEL: and_dup_not_v2i32_swapped:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
@@ -3012,9 +3058,8 @@ define <2 x i32> @and_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
define <4 x i32> @and_dup_not_v4i32(<4 x i32> %a, i32 %m) {
; CHECK-LABEL: and_dup_not_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4s, w8
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: dup v1.4s, w0
+; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
@@ -3026,9 +3071,8 @@ define <4 x i32> @and_dup_not_v4i32(<4 x i32> %a, i32 %m) {
define <4 x i32> @and_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
; CHECK-LABEL: and_dup_not_v4i32_swapped:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4s, w8
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: dup v1.4s, w0
+; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
@@ -3040,9 +3084,8 @@ define <4 x i32> @and_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
define <2 x i64> @and_dup_not_v2i64(<2 x i64> %a, i64 %m) {
; CHECK-LABEL: and_dup_not_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn x8, x0
-; CHECK-NEXT: dup v1.2d, x8
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: dup v1.2d, x0
+; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i64 %m, -1
%insert = insertelement <2 x i64> poison, i64 %not, i64 0
@@ -3052,12 +3095,18 @@ define <2 x i64> @and_dup_not_v2i64(<2 x i64> %a, i64 %m) {
}
define <8 x i8> @or_dup_not_v8i8(<8 x i8> %a, i8 %m) {
-; CHECK-LABEL: or_dup_not_v8i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8b, w8
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v8i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8b, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v8i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8b, w8
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <8 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -3066,12 +3115,18 @@ define <8 x i8> @or_dup_not_v8i8(<8 x i8> %a, i8 %m) {
}
define <8 x i8> @or_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
-; CHECK-LABEL: or_dup_not_v8i8_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8b, w8
-; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v8i8_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8b, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v8i8_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8b, w8
+; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <8 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -3080,12 +3135,18 @@ define <8 x i8> @or_dup_not_v8i8_swapped(<8 x i8> %a, i8 %m) {
}
define <16 x i8> @or_dup_not_v16i8(<16 x i8> %a, i8 %m) {
-; CHECK-LABEL: or_dup_not_v16i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.16b, w8
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v16i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v16i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.16b, w8
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <16 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -3094,12 +3155,18 @@ define <16 x i8> @or_dup_not_v16i8(<16 x i8> %a, i8 %m) {
}
define <16 x i8> @or_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
-; CHECK-LABEL: or_dup_not_v16i8_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.16b, w8
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v16i8_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v16i8_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.16b, w8
+; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i8 %m, -1
%insert = insertelement <16 x i8> poison, i8 %not, i64 0
%shuffle = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -3108,12 +3175,18 @@ define <16 x i8> @or_dup_not_v16i8_swapped(<16 x i8> %a, i8 %m) {
}
define <4 x i16> @or_dup_not_v4i16(<4 x i16> %a, i16 %m) {
-; CHECK-LABEL: or_dup_not_v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4h, w8
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4h, w8
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <4 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -3122,12 +3195,18 @@ define <4 x i16> @or_dup_not_v4i16(<4 x i16> %a, i16 %m) {
}
define <4 x i16> @or_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
-; CHECK-LABEL: or_dup_not_v4i16_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4h, w8
-; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v4i16_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4h, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v4i16_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4h, w8
+; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <4 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -3136,12 +3215,18 @@ define <4 x i16> @or_dup_not_v4i16_swapped(<4 x i16> %a, i16 %m) {
}
define <8 x i16> @or_dup_not_v8i16(<8 x i16> %a, i16 %m) {
-; CHECK-LABEL: or_dup_not_v8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8h, w8
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8h, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8h, w8
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <8 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -3150,12 +3235,18 @@ define <8 x i16> @or_dup_not_v8i16(<8 x i16> %a, i16 %m) {
}
define <8 x i16> @or_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
-; CHECK-LABEL: or_dup_not_v8i16_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.8h, w8
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v8i16_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.8h, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v8i16_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.8h, w8
+; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i16 %m, -1
%insert = insertelement <8 x i16> poison, i16 %not, i64 0
%shuffle = shufflevector <8 x i16> %insert, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -3166,9 +3257,8 @@ define <8 x i16> @or_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
define <2 x i32> @or_dup_not_v2i32(<2 x i32> %a, i32 %m) {
; CHECK-LABEL: or_dup_not_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
@@ -3180,9 +3270,8 @@ define <2 x i32> @or_dup_not_v2i32(<2 x i32> %a, i32 %m) {
define <2 x i32> @or_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
; CHECK-LABEL: or_dup_not_v2i32_swapped:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.2s, w8
-; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
@@ -3194,9 +3283,8 @@ define <2 x i32> @or_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
define <4 x i32> @or_dup_not_v4i32(<4 x i32> %a, i32 %m) {
; CHECK-LABEL: or_dup_not_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4s, w8
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: dup v1.4s, w0
+; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
@@ -3208,9 +3296,8 @@ define <4 x i32> @or_dup_not_v4i32(<4 x i32> %a, i32 %m) {
define <4 x i32> @or_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
; CHECK-LABEL: or_dup_not_v4i32_swapped:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn w8, w0
-; CHECK-NEXT: dup v1.4s, w8
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: dup v1.4s, w0
+; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
@@ -3222,9 +3309,8 @@ define <4 x i32> @or_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
define <2 x i64> @or_dup_not_v2i64(<2 x i64> %a, i64 %m) {
; CHECK-LABEL: or_dup_not_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mvn x8, x0
-; CHECK-NEXT: dup v1.2d, x8
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: dup v1.2d, x0
+; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%not = xor i64 %m, -1
%insert = insertelement <2 x i64> poison, i64 %not, i64 0
>From e317495ea9bb9b5df8115ce32b2ddf7e88da551b Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Fri, 27 Feb 2026 14:45:45 +0100
Subject: [PATCH 3/4] [AArch64] Move the transform to AArch64DAGToDAGISel
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 26 ++++++++++++++++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 17 ------------
2 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 67f4e127b0c87..cedbc2b46dfd1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/GlobalValue.h"
@@ -432,6 +433,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
const SDValue &OldBase, const SDValue &OldOffset,
unsigned Scale);
+ bool tryBicOrnDup(SDNode *N);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);
@@ -2815,6 +2817,23 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
return true;
}
+bool AArch64DAGToDAGISel::tryBicOrnDup(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ assert(Opc == ISD::AND || Opc == ISD::OR);
+ using namespace SDPatternMatch;
+ SDValue X, Y;
+ if (!sd_match(N, m_c_BinOp(Opc, m_Node(AArch64ISD::DUP, m_Not(m_Value(X))),
+ m_Value(Y))))
+ return false;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue Dup = CurDAG->getNode(AArch64ISD::DUP, DL, VT, X);
+ SDValue Ops[] = {CurDAG->getNOT(DL, Dup, VT), Y};
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+}
+
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
assert(N->getOpcode() == ISD::SIGN_EXTEND);
@@ -4870,8 +4889,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
- case ISD::SRL:
case ISD::AND:
+ if (tryBicOrnDup(Node))
+ return;
+ [[fallthrough]];
+ case ISD::SRL:
case ISD::SRA:
case ISD::SIGN_EXTEND_INREG:
if (tryBitfieldExtractOp(Node))
@@ -4891,6 +4913,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::OR:
+ if (tryBicOrnDup(Node))
+ return;
if (tryBitfieldInsertOp(Node))
return;
if (trySelectXAR(Node))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8dccc892bf2dd..ace85b04595b8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7856,23 +7856,6 @@ def : DUPWithTruncPat<v16i8, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
def : DUPWithTruncPat<v4i16, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
def : DUPWithTruncPat<v8i16, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
-multiclass BicOrnDupGpr<ValueType vectype, RegisterClass regtype> {
- defvar vecreg = !cast<RegisterOperand>("V" # vectype.Size);
- defvar bvectype = "v" # !srl(vectype.Size, 3) # "i8";
- def : Pat<(vectype (and vecreg:$Rn, (AArch64dup (not regtype:$Rm)))),
- (!cast<Instruction>("BIC" # bvectype) vectype:$Rn, (!cast<Instruction>("DUP" # vectype # "gpr") regtype:$Rm))>;
- def : Pat<(vectype (or vecreg:$Rn, (AArch64dup (not regtype:$Rm)))),
- (!cast<Instruction>("ORN" # bvectype) vectype:$Rn, (!cast<Instruction>("DUP" # vectype # "gpr") regtype:$Rm))>;
-}
-
-defm : BicOrnDupGpr<v8i8, GPR32>;
-defm : BicOrnDupGpr<v16i8, GPR32>;
-defm : BicOrnDupGpr<v4i16, GPR32>;
-defm : BicOrnDupGpr<v8i16, GPR32>;
-defm : BicOrnDupGpr<v2i32, GPR32>;
-defm : BicOrnDupGpr<v4i32, GPR32>;
-defm : BicOrnDupGpr<v2i64, GPR64>;
-
// SMOV and UMOV definitions, with some extra patterns for convenience
defm SMOV : SMov;
defm UMOV : UMov;
>From 415c522df9600d78fb0d53dcc59921c93d7aeccd Mon Sep 17 00:00:00 2001
From: Piotr Fusik <p.fusik at samsung.com>
Date: Mon, 2 Mar 2026 09:05:26 +0100
Subject: [PATCH 4/4] [AArch64] Move the transform to AArch64ISelLowering
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 26 +--
.../Target/AArch64/AArch64ISelLowering.cpp | 25 +++
.../AArch64/neon-bitwise-instructions.ll | 170 ++++++++++++------
3 files changed, 146 insertions(+), 75 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index cedbc2b46dfd1..67f4e127b0c87 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -15,7 +15,6 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/GlobalValue.h"
@@ -433,7 +432,6 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
const SDValue &OldBase, const SDValue &OldOffset,
unsigned Scale);
- bool tryBicOrnDup(SDNode *N);
bool tryBitfieldExtractOp(SDNode *N);
bool tryBitfieldExtractOpFromSExt(SDNode *N);
bool tryBitfieldInsertOp(SDNode *N);
@@ -2817,23 +2815,6 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
return true;
}
-bool AArch64DAGToDAGISel::tryBicOrnDup(SDNode *N) {
- unsigned Opc = N->getOpcode();
- assert(Opc == ISD::AND || Opc == ISD::OR);
- using namespace SDPatternMatch;
- SDValue X, Y;
- if (!sd_match(N, m_c_BinOp(Opc, m_Node(AArch64ISD::DUP, m_Not(m_Value(X))),
- m_Value(Y))))
- return false;
-
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
- SDValue Dup = CurDAG->getNode(AArch64ISD::DUP, DL, VT, X);
- SDValue Ops[] = {CurDAG->getNOT(DL, Dup, VT), Y};
- CurDAG->SelectNodeTo(N, Opc, VT, Ops);
- return true;
-}
-
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
assert(N->getOpcode() == ISD::SIGN_EXTEND);
@@ -4889,11 +4870,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
- case ISD::AND:
- if (tryBicOrnDup(Node))
- return;
- [[fallthrough]];
case ISD::SRL:
+ case ISD::AND:
case ISD::SRA:
case ISD::SIGN_EXTEND_INREG:
if (tryBitfieldExtractOp(Node))
@@ -4913,8 +4891,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::OR:
- if (tryBicOrnDup(Node))
- return;
if (tryBitfieldInsertOp(Node))
return;
if (trySelectXAR(Node))
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index eb6e9146e3839..dc5756f950cc9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21024,6 +21024,25 @@ static SDValue tryCombineToREV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(RevOp, DL, HalfVT, N0->getOperand(0)));
}
+// (and/or X, (splat (not Y))) -> (and/or X, (not (splat Y)))
+// so that it gets selected as (bic/orn X, (dup Y))
+static SDValue performANDORDUPNOTCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opc = N->getOpcode();
+ assert(Opc == ISD::AND || Opc == ISD::OR);
+ using namespace llvm::SDPatternMatch;
+ SDValue X, Y;
+ if (!sd_match(N, m_c_BinOp(Opc, m_Value(X),
+ m_Shuffle(m_InsertElt(m_Poison(),
+ m_Not(m_Value(Y)), m_Zero()),
+ m_Poison()))))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue Not = DAG.getNOT(DL, DAG.getSplat(VT, DL, Y), VT);
+ return DAG.getNode(Opc, DL, VT, X, Not);
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget,
const AArch64TargetLowering &TLI) {
@@ -21035,6 +21054,9 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
if (SDValue R = tryCombineToREV(N, DAG, DCI))
return R;
+ if (SDValue R = performANDORDUPNOTCombine(N, DAG))
+ return R;
+
return SDValue();
}
@@ -21241,6 +21263,9 @@ static SDValue performANDCombine(SDNode *N,
if (SDValue R = performANDSETCCCombine(N,DCI))
return R;
+ if (SDValue R = performANDORDUPNOTCombine(N, DAG))
+ return R;
+
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 576fe9d1181e4..ac7c6c00e0533 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -3030,11 +3030,18 @@ define <8 x i16> @and_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
}
define <2 x i32> @and_dup_not_v2i32(<2 x i32> %a, i32 %m) {
-; CHECK-LABEL: and_dup_not_v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2s, w0
-; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2s, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.2s, w8
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -3043,11 +3050,18 @@ define <2 x i32> @and_dup_not_v2i32(<2 x i32> %a, i32 %m) {
}
define <2 x i32> @and_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
-; CHECK-LABEL: and_dup_not_v2i32_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2s, w0
-; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v2i32_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2s, w0
+; CHECK-SD-NEXT: bic v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v2i32_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.2s, w8
+; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -3056,11 +3070,18 @@ define <2 x i32> @and_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
}
define <4 x i32> @and_dup_not_v4i32(<4 x i32> %a, i32 %m) {
-; CHECK-LABEL: and_dup_not_v4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4s, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4s, w8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -3069,11 +3090,18 @@ define <4 x i32> @and_dup_not_v4i32(<4 x i32> %a, i32 %m) {
}
define <4 x i32> @and_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
-; CHECK-LABEL: and_dup_not_v4i32_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v4i32_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4s, w0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v4i32_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4s, w8
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -3082,11 +3110,18 @@ define <4 x i32> @and_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
}
define <2 x i64> @and_dup_not_v2i64(<2 x i64> %a, i64 %m) {
-; CHECK-LABEL: and_dup_not_v2i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2d, x0
-; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: and_dup_not_v2i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2d, x0
+; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_dup_not_v2i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn x8, x0
+; CHECK-GI-NEXT: dup v1.2d, x8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i64 %m, -1
%insert = insertelement <2 x i64> poison, i64 %not, i64 0
%shuffle = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -3255,11 +3290,18 @@ define <8 x i16> @or_dup_not_v8i16_swapped(<8 x i16> %a, i16 %m) {
}
define <2 x i32> @or_dup_not_v2i32(<2 x i32> %a, i32 %m) {
-; CHECK-LABEL: or_dup_not_v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2s, w0
-; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2s, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.2s, w8
+; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -3268,11 +3310,18 @@ define <2 x i32> @or_dup_not_v2i32(<2 x i32> %a, i32 %m) {
}
define <2 x i32> @or_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
-; CHECK-LABEL: or_dup_not_v2i32_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2s, w0
-; CHECK-NEXT: orn v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v2i32_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2s, w0
+; CHECK-SD-NEXT: orn v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v2i32_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.2s, w8
+; CHECK-GI-NEXT: orr v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <2 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -3281,11 +3330,18 @@ define <2 x i32> @or_dup_not_v2i32_swapped(<2 x i32> %a, i32 %m) {
}
define <4 x i32> @or_dup_not_v4i32(<4 x i32> %a, i32 %m) {
-; CHECK-LABEL: or_dup_not_v4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4s, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4s, w8
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -3294,11 +3350,18 @@ define <4 x i32> @or_dup_not_v4i32(<4 x i32> %a, i32 %m) {
}
define <4 x i32> @or_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
-; CHECK-LABEL: or_dup_not_v4i32_swapped:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.4s, w0
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v4i32_swapped:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.4s, w0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v4i32_swapped:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn w8, w0
+; CHECK-GI-NEXT: dup v1.4s, w8
+; CHECK-GI-NEXT: orr v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT: ret
%not = xor i32 %m, -1
%insert = insertelement <4 x i32> poison, i32 %not, i64 0
%shuffle = shufflevector <4 x i32> %insert, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -3307,11 +3370,18 @@ define <4 x i32> @or_dup_not_v4i32_swapped(<4 x i32> %a, i32 %m) {
}
define <2 x i64> @or_dup_not_v2i64(<2 x i64> %a, i64 %m) {
-; CHECK-LABEL: or_dup_not_v2i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.2d, x0
-; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: or_dup_not_v2i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.2d, x0
+; CHECK-SD-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: or_dup_not_v2i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mvn x8, x0
+; CHECK-GI-NEXT: dup v1.2d, x8
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%not = xor i64 %m, -1
%insert = insertelement <2 x i64> poison, i64 %not, i64 0
%shuffle = shufflevector <2 x i64> %insert, <2 x i64> poison, <2 x i32> zeroinitializer
More information about the llvm-commits
mailing list