[llvm] 963c0a0 - [AArch64] Look through bitcast when looking for extract_high subvector
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 12 02:59:14 PDT 2022
Author: David Green
Date: 2022-06-12T10:59:09+01:00
New Revision: 963c0a014700ab07bd7c81ffd1789425a297f0fb
URL: https://github.com/llvm/llvm-project/commit/963c0a014700ab07bd7c81ffd1789425a297f0fb
DIFF: https://github.com/llvm/llvm-project/commit/963c0a014700ab07bd7c81ffd1789425a297f0fb.diff
LOG: [AArch64] Look through bitcast when looking for extract_high subvector
Since D61806, DAGCombiner has folded subvector_extract(bitcast(..)) to
bitcast(subvector_extract(..)), which would place a bitcast between a
subvector_extract and the operation that could be converted to a high
neon instruction (like smull2). This adds better matching for the
subvector_extract, through the tablegen extract_high PatFrags to
optionally skip the bitcast under little ending, still matchings an
extract of the high half of the input vector.
I didn't update the extract_high of a duplicate patterns, as the
ComplexPattern need names operands. I did add a extract_high_dup_v8i16
PatFrag to abstract away the common code, which can be extended in a
future patch.
Differential Revision: https://reviews.llvm.org/D126782
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/highextractbitcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index b1f6d1d81275..d6d6a18cbdec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -159,6 +159,22 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
}
+ bool SelectExtractHigh(SDValue N, SDValue &Res) {
+ if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+ if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ !isa<ConstantSDNode>(N->getOperand(1)))
+ return false;
+ EVT VT = N->getValueType(0);
+ EVT LVT = N->getOperand(0).getValueType();
+ unsigned Index = N->getConstantOperandVal(1);
+ if (!VT.is64BitVector() || !LVT.is128BitVector() ||
+ Index != VT.getVectorNumElements())
+ return false;
+ Res = N->getOperand(0);
+ return true;
+ }
+
bool SelectDupZeroOrUndef(SDValue N) {
switch(N->getOpcode()) {
case ISD::UNDEF:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f7c2f30d1183..78bc1b8c6f02 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -109,15 +109,19 @@ class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
-// Helper fragment for an extract of the high portion of a 128-bit vector.
+// Helper fragment for an extract of the high portion of a 128-bit vector. The
+// ComplexPattern match both extract_subvector and bitcast(extract_subvector(..)).
def extract_high_v16i8 :
- UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
+ ComplexPattern<v8i8, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v8i16 :
- UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
+ ComplexPattern<v4i16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4i32 :
- UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
-def extract_high_v2i64 :
- UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
+ ComplexPattern<v2i32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+
+def extract_high_dup_v8i16 :
+ BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>;
+def extract_high_dup_v4i32 :
+ BinOpFrag<(extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 2))>;
//===----------------------------------------------------------------------===//
// Asm Operand Classes.
@@ -6509,8 +6513,8 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
asm#"2", ".1q", ".2d", ".2d", []>;
}
- def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
- (v8i8 (extract_high_v16i8 V128:$Rm)))),
+ def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
+ (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
(!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
}
@@ -6523,8 +6527,8 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6532,8 +6536,8 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
@@ -6547,8 +6551,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6558,8 +6562,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6569,8 +6573,8 @@ multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
@@ -6587,8 +6591,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(add (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))))))]>;
+ (zext (v8i8 (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm)))))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6600,8 +6604,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(add (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))))))]>;
+ (zext (v4i16 (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm)))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6613,8 +6617,8 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(add (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))))))]>;
+ (zext (v2i32 (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
}
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
@@ -6626,8 +6630,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
V128, V128, V128,
asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6635,8 +6639,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
V128, V128, V128,
asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6644,8 +6648,8 @@ multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
V128, V128, V128,
asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
@@ -6661,8 +6665,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".8h", ".16b", ".16b",
[(set (v8i16 V128:$dst),
(OpNode (v8i16 V128:$Rd),
- (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rn)),
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
V128, V64, V64,
asm, ".4s", ".4h", ".4h",
@@ -6673,8 +6677,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6685,8 +6689,8 @@ multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
@@ -6703,8 +6707,8 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
asm#"2", ".4s", ".8h", ".8h",
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
- (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
+ (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_v8i16 (v8i16 V128:$Rm))))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
V128, V64, V64,
asm, ".2d", ".2s", ".2s",
@@ -6717,8 +6721,8 @@ multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
asm#"2", ".2d", ".4s", ".4s",
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
- (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
+ (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_v4i32 (v4i32 V128:$Rm))))))]>;
}
multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
@@ -6731,7 +6735,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".8h", ".8h", ".16b",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
+ (extract_high_v16i8 (v16i8 V128:$Rm))))]>;
def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
V128, V128, V64,
asm, ".4s", ".4s", ".4h",
@@ -6740,7 +6744,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".4s", ".4s", ".8h",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
+ (extract_high_v8i16 (v8i16 V128:$Rm))))]>;
def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
V128, V128, V64,
asm, ".2d", ".2d", ".2s",
@@ -6749,7 +6753,7 @@ multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
V128, V128, V128,
asm#"2", ".2d", ".2d", ".4s",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
+ (extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
//----------------------------------------------------------------------------
@@ -8768,9 +8772,8 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
@@ -8795,9 +8798,8 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8860,10 +8862,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),
(v4i32 (int_aarch64_neon_sqdmull
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16
- (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8892,10 +8892,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),
(v2i64 (int_aarch64_neon_sqdmull
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32
- (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8948,9 +8946,8 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
@@ -8975,9 +8972,8 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -9007,9 +9003,8 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
+ (extract_high_v8i16 (v8i16 V128:$Rn)),
+ (extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -9034,9 +9029,8 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
+ (extract_high_v4i32 (v4i32 V128:$Rn)),
+ (extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -9721,7 +9715,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL8,
asm#"2", ".8h", ".16b",
[(set (v8i16 V128:$Rd),
- (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
+ (OpNode (extract_high_v16i8 (v16i8 V128:$Rn)), vecshiftL8:$imm))]> {
bits<3> imm;
let Inst{18-16} = imm;
}
@@ -9737,7 +9731,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL16,
asm#"2", ".4s", ".8h",
[(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
+ (OpNode (extract_high_v8i16 (v8i16 V128:$Rn)), vecshiftL16:$imm))]> {
bits<4> imm;
let Inst{19-16} = imm;
@@ -9754,7 +9748,7 @@ multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
V128, V128, vecshiftL32,
asm#"2", ".2d", ".4s",
[(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
+ (OpNode (extract_high_v4i32 (v4i32 V128:$Rn)), vecshiftL32:$imm))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 65ca797eee28..7f32465a9404 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4250,25 +4250,25 @@ def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(zext (v8i8 V64:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
- (zext (extract_high_v16i8 V128:$opB))))),
+def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
+ (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
- (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
- (zext (extract_high_v16i8 V128:$opB))),
+ (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
+ (zext (extract_high_v16i8 (v16i8 V128:$opB)))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
(zext (v4i16 V64:$opB))))),
(UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
- (zext (extract_high_v8i16 V128:$opB))))),
+def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
+ (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
(UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
(zext (v2i32 V64:$opB))))),
(UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
-def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
- (zext (extract_high_v4i32 V128:$opB))))),
+def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
+ (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
(UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
@@ -4439,15 +4439,15 @@ def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
(SHLLv8i8 V64:$Rn)>;
- def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
+ def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
(SHLLv16i8 V128:$Rn)>;
def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
(SHLLv4i16 V64:$Rn)>;
- def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
+ def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
(SHLLv8i16 V128:$Rn)>;
def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
(SHLLv2i32 V64:$Rn)>;
- def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
+ def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
(SHLLv4i32 V128:$Rn)>;
}
diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
index 5c3c37f47fee..89d3d2187b40 100644
--- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
@@ -37,9 +37,7 @@ entry:
define <4 x i32> @test_smull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcasta1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta1:
@@ -63,9 +61,7 @@ entry:
define <4 x i32> @test_smull_high_s16_bitcastb1(<8 x i16> %a, <16 x i8> %bb) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcastb1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb1:
@@ -89,9 +85,7 @@ entry:
define <4 x i32> @test_smull_high_s16_bitcasta2(<2 x i64> %a, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcasta2:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta2:
@@ -117,9 +111,7 @@ entry:
define <4 x i32> @test_smull_high_s16_bitcastb2(<8 x i16> %a, <16 x i8> %b) #0 {
; CHECK-LE-LABEL: test_smull_high_s16_bitcastb2:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb2:
@@ -374,9 +366,7 @@ entry:
define <4 x i32> @test_umull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
; CHECK-LE-LABEL: test_umull_high_s16_bitcasta1:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: umull v0.4s, v0.4h, v1.4h
+; CHECK-LE-NEXT: umull2 v0.4s, v0.8h, v1.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_umull_high_s16_bitcasta1:
@@ -400,9 +390,7 @@ entry:
define <8 x i16> @test_vabdl_high_u82(<16 x i8> %a, <8 x i16> %bb) {
; CHECK-LE-LABEL: test_vabdl_high_u82:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: uabdl v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: uabdl2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_u82:
@@ -427,9 +415,7 @@ entry:
define <8 x i16> @test_vabdl_high_s82(<16 x i8> %a, <8 x i16> %bb) {
; CHECK-LE-LABEL: test_vabdl_high_s82:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: sabdl v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: sabdl2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_s82:
@@ -454,9 +440,7 @@ entry:
define <4 x i32> @test_vqdmlal_high_s16_bitcast(<4 x i32> %a, <8 x i16> %b, <16 x i8> %cc) {
; CHECK-LE-LABEL: test_vqdmlal_high_s16_bitcast:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-LE-NEXT: sqdmlal v0.4s, v1.4h, v2.4h
+; CHECK-LE-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_vqdmlal_high_s16_bitcast:
@@ -510,9 +494,7 @@ entry:
define <8 x i16> @test_pmull_high_p8_64(<2 x i64> %aa, <2 x i64> %bb) {
; CHECK-LE-LABEL: test_pmull_high_p8_64:
; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-LE-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-LE-NEXT: pmull2 v0.8h, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_pmull_high_p8_64:
@@ -568,8 +550,7 @@ define <8 x i16> @foov8i16(<16 x i8> %a1, <2 x i64> %b1) {
define <2 x i64> @hadd32_zext_asr(<16 x i8> %src1a) {
; CHECK-LE-LABEL: hadd32_zext_asr:
; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-LE-NEXT: ushll v0.2d, v0.2s, #1
+; CHECK-LE-NEXT: ushll2 v0.2d, v0.4s, #1
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: hadd32_zext_asr:
More information about the llvm-commits
mailing list