[llvm] [AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15), 0x10001), 0xffff) to CMLTz (PR #92915)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 09:37:10 PDT 2024
https://github.com/chuongg3 updated https://github.com/llvm/llvm-project/pull/92915
>From a1a8f6f458e90ba375fccd8cfc2d9ac6f5565930 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 12 Apr 2024 20:17:36 +0000
Subject: [PATCH 1/2] [AArch64][GlobalISel] Pre-commit Test for Combine
MUL(AND(LSHR)) to CMLT
---
llvm/test/CodeGen/AArch64/mulcmle.ll | 137 +++++++++++++++++++++------
1 file changed, 107 insertions(+), 30 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 5c216b8550080..90cfa41c1ba1a 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,11 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <1 x i64> @v1i64(<1 x i64> %a) {
-; CHECK-LABEL: v1i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v1i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.2s, v0.2s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v1i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: lsr x8, x8, #31
+; CHECK-GI-NEXT: and x8, x8, #0x100000001
+; CHECK-GI-NEXT: lsl x9, x8, #32
+; CHECK-GI-NEXT: sub x8, x9, x8
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
%b = lshr <1 x i64> %a, <i64 31>
%c = and <1 x i64> %b, <i64 4294967297>
%d = mul nuw <1 x i64> %c, <i64 4294967295>
@@ -13,10 +24,28 @@ define <1 x i64> @v1i64(<1 x i64> %a) {
}
define <2 x i64> @v2i64(<2 x i64> %a) {
-; CHECK-LABEL: v2i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4s, #1
+; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #31
+; CHECK-GI-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mov d3, v2.d[1]
+; CHECK-GI-NEXT: fmov x9, d2
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: fmov x8, d0
+; CHECK-GI-NEXT: fmov x10, d3
+; CHECK-GI-NEXT: mul x8, x8, x9
+; CHECK-GI-NEXT: fmov x9, d1
+; CHECK-GI-NEXT: mul x9, x9, x10
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: mov v0.d[1], x9
+; CHECK-GI-NEXT: ret
%b = lshr <2 x i64> %a, <i64 31, i64 31>
%c = and <2 x i64> %b, <i64 4294967297, i64 4294967297>
%d = mul nuw <2 x i64> %c, <i64 4294967295, i64 4294967295>
@@ -24,10 +53,19 @@ define <2 x i64> @v2i64(<2 x i64> %a) {
}
define <2 x i32> @v2i32(<2 x i32> %a) {
-; CHECK-LABEL: v2i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.4h, #1
+; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #15
+; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s
+; CHECK-GI-NEXT: ret
%b = lshr <2 x i32> %a, <i32 15, i32 15>
%c = and <2 x i32> %b, <i32 65537, i32 65537>
%d = mul nuw <2 x i32> %c, <i32 65535, i32 65535>
@@ -35,10 +73,19 @@ define <2 x i32> @v2i32(<2 x i32> %a) {
}
define <4 x i32> @v4i32(<4 x i32> %a) {
-; CHECK-LABEL: v4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8h, #1
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
+; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mul v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: ret
%b = lshr <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15>
%c = and <4 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <4 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -46,11 +93,23 @@ define <4 x i32> @v4i32(<4 x i32> %a) {
}
define <8 x i32> @v8i32(<8 x i32> %a) {
-; CHECK-LABEL: v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-SD-NEXT: cmlt v1.8h, v1.8h, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #1
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
+; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #15
+; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: mul v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%b = lshr <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
%c = and <8 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <8 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -58,10 +117,19 @@ define <8 x i32> @v8i32(<8 x i32> %a) {
}
define <4 x i16> @v4i16(<4 x i16> %a) {
-; CHECK-LABEL: v4i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.8b, #1
+; CHECK-GI-NEXT: ushr v0.4h, v0.4h, #7
+; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: ret
%b = lshr <4 x i16> %a, <i16 7, i16 7, i16 7, i16 7>
%c = and <4 x i16> %b, <i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <4 x i16> %c, <i16 255, i16 255, i16 255, i16 255>
@@ -69,10 +137,19 @@ define <4 x i16> @v4i16(<4 x i16> %a) {
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-LABEL: v8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.16b, #1
+; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #7
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: mul v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: ret
%b = lshr <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%c = and <8 x i16> %b, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <8 x i16> %c, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
>From 2cf47f0c7e744c6f0816852149a32343ce1baa06 Mon Sep 17 00:00:00 2001
From: Tuan Chuong Goh <chuong.goh at arm.com>
Date: Fri, 12 Apr 2024 20:40:41 +0000
Subject: [PATCH 2/2] [AArch64][GlobalISel] Combine MUL(AND(LSHR(X, 15),
0x10001), 0xffff) to CMLTz
This patch mirrors the following SelectionDAG patch for GlobalISel:
https://reviews.llvm.org/D130874
---
llvm/lib/Target/AArch64/AArch64Combine.td | 11 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 55 +++++++++
llvm/test/CodeGen/AArch64/mulcmle.ll | 116 ++++--------------
3 files changed, 90 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1c7f6b870d390..1ce6cdf1c1e1e 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -265,6 +265,14 @@ def or_to_bsp: GICombineRule <
(apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
>;
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+def combine_mul_cmlt : GICombineRule<
+ (defs root:$root, register_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_MUL):$root,
+ [{ return matchCombineMulCMLT(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -296,5 +304,6 @@ def AArch64PostLegalizerCombiner
split_store_zero_128, undef_combines,
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs,
- push_freeze_to_prevent_poison_from_propagating]> {
+ push_freeze_to_prevent_poison_from_propagating,
+ combine_mul_cmlt]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index d8ca5494ba50a..7f3e0e01ccd25 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -381,6 +381,61 @@ void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz
+bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&
+ DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&
+ DstTy != LLT::fixed_vector(8, 16))
+ return false;
+
+ auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ if (AndMI->getOpcode() != TargetOpcode::G_AND)
+ return false;
+ auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);
+ if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)
+ return false;
+
+ // Check the constant splat values
+ auto V1 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);
+ auto V2 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);
+ auto V3 = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);
+ if (!V1.has_value() || !V2.has_value() || !V3.has_value())
+ return false;
+ unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;
+ if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||
+ V3 != (HalfSize - 1))
+ return false;
+
+ SrcReg = LShrMI->getOperand(1).getReg();
+
+ return true;
+}
+
+void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, Register &SrcReg) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT HalfTy =
+ DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))
+ .changeElementSize(DstTy.getScalarSizeInBits() / 2);
+
+ Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);
+ Register CastReg =
+ B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);
+ Register CMLTReg =
+ B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)
+ .getReg(0);
+
+ B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);
+ MI.eraseFromParent();
+}
+
class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 90cfa41c1ba1a..32bc5c5e63b3e 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -24,28 +24,10 @@ define <1 x i64> @v1i64(<1 x i64> %a) {
}
define <2 x i64> @v2i64(<2 x i64> %a) {
-; CHECK-SD-LABEL: v2i64:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4s, v0.4s, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v2i64:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.4s, #1
-; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #31
-; CHECK-GI-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: fmov x9, d2
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: fmov x10, d3
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: fmov x9, d1
-; CHECK-GI-NEXT: mul x9, x9, x10
-; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: mov v0.d[1], x9
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: ret
%b = lshr <2 x i64> %a, <i64 31, i64 31>
%c = and <2 x i64> %b, <i64 4294967297, i64 4294967297>
%d = mul nuw <2 x i64> %c, <i64 4294967295, i64 4294967295>
@@ -53,19 +35,10 @@ define <2 x i64> @v2i64(<2 x i64> %a) {
}
define <2 x i32> @v2i32(<2 x i32> %a) {
-; CHECK-SD-LABEL: v2i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v2i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.4h, #1
-; CHECK-GI-NEXT: ushr v0.2s, v0.2s, #15
-; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
%b = lshr <2 x i32> %a, <i32 15, i32 15>
%c = and <2 x i32> %b, <i32 65537, i32 65537>
%d = mul nuw <2 x i32> %c, <i32 65535, i32 65535>
@@ -73,19 +46,10 @@ define <2 x i32> @v2i32(<2 x i32> %a) {
}
define <4 x i32> @v4i32(<4 x i32> %a) {
-; CHECK-SD-LABEL: v4i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.8h, #1
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
-; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mul v0.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: ret
%b = lshr <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15>
%c = and <4 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <4 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -93,23 +57,11 @@ define <4 x i32> @v4i32(<4 x i32> %a) {
}
define <8 x i32> @v8i32(<8 x i32> %a) {
-; CHECK-SD-LABEL: v8i32:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8h, v0.8h, #0
-; CHECK-SD-NEXT: cmlt v1.8h, v1.8h, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i32:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v2.8h, #1
-; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #15
-; CHECK-GI-NEXT: ushr v1.4s, v1.4s, #15
-; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s
-; CHECK-GI-NEXT: mul v1.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
+; CHECK-NEXT: cmlt v1.8h, v1.8h, #0
+; CHECK-NEXT: ret
%b = lshr <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
%c = and <8 x i32> %b, <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
%d = mul nuw <8 x i32> %c, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -117,19 +69,10 @@ define <8 x i32> @v8i32(<8 x i32> %a) {
}
define <4 x i16> @v4i16(<4 x i16> %a) {
-; CHECK-SD-LABEL: v4i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v4i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.8b, #1
-; CHECK-GI-NEXT: ushr v0.4h, v0.4h, #7
-; CHECK-GI-NEXT: movi d2, #0xff00ff00ff00ff
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
%b = lshr <4 x i16> %a, <i16 7, i16 7, i16 7, i16 7>
%c = and <4 x i16> %b, <i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <4 x i16> %c, <i16 255, i16 255, i16 255, i16 255>
@@ -137,19 +80,10 @@ define <4 x i16> @v4i16(<4 x i16> %a) {
}
define <8 x i16> @v8i16(<8 x i16> %a) {
-; CHECK-SD-LABEL: v8i16:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: v8i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi v1.16b, #1
-; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #7
-; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
%b = lshr <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
%c = and <8 x i16> %b, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
%d = mul nuw <8 x i16> %c, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
More information about the llvm-commits
mailing list