[llvm] b310dd1 - [AArch64][SVE] Lower index_vector to step_vector
Jun Ma via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 30 04:14:07 PDT 2021
Author: Jun Ma
Date: 2021-04-30T19:04:39+08:00
New Revision: b310dd15017f9aecd1ecc84b896d346075282a34
URL: https://github.com/llvm/llvm-project/commit/b310dd15017f9aecd1ecc84b896d346075282a34
DIFF: https://github.com/llvm/llvm-project/commit/b310dd15017f9aecd1ecc84b896d346075282a34.diff
LOG: [AArch64][SVE] Lower index_vector to step_vector
As discussed in D100107, this patch first convert index_vector to
step_vector, and convert step_vector back to index_vector after LegalizeDAG.
Differential Revision: https://reviews.llvm.org/D100816
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2ec9389d2894e..34c3c8c66b9da 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -902,6 +902,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::VECREDUCE_ADD);
+ setTargetDAGCombine(ISD::STEP_VECTOR);
setTargetDAGCombine(ISD::GlobalAddress);
@@ -1151,7 +1152,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
@@ -4476,8 +4476,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);
- case ISD::STEP_VECTOR:
- return LowerSTEP_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
@@ -9162,20 +9160,6 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return GenerateTBL(Op, ShuffleMask, DAG);
}
-SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
- assert(VT.isScalableVector() &&
- "Only expect scalable vectors for STEP_VECTOR");
- assert(VT.getScalarType() != MVT::i1 &&
- "Vectors of i1 types not supported for STEP_VECTOR");
-
- SDValue StepVal = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
- return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
-}
-
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9261,9 +9245,7 @@ SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
// create the vector 0,1,0,1,...
- SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
- SDValue SV = DAG.getNode(AArch64ISD::INDEX_VECTOR,
- DL, MVT::nxv2i64, Zero, One);
+ SDValue SV = DAG.getNode(ISD::STEP_VECTOR, DL, MVT::nxv2i64, One);
SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
// create the vector idx64,idx64+1,idx64,idx64+1,...
@@ -13665,15 +13647,18 @@ static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
- EVT ScalarTy = Op1.getValueType();
-
- if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) {
- Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
- Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
- }
+ EVT ScalarTy = Op2.getValueType();
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
+ ScalarTy = MVT::i32;
- return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0),
- Op1, Op2);
+ // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
+ SDValue One = DAG.getConstant(1, DL, ScalarTy);
+ SDValue StepVector =
+ DAG.getNode(ISD::STEP_VECTOR, DL, N->getValueType(0), One);
+ SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
+ SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
}
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
@@ -15463,6 +15448,19 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
+static SDValue performStepVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (!DCI.isAfterLegalizeDAG())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue StepVal = N->getOperand(0);
+ SDValue Zero = DAG.getConstant(0, DL, StepVal.getValueType());
+ return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, VT, Zero, StepVal);
+}
+
// Turns the vector of indices into a vector of byte offstes by scaling Offset
// by (BitWidth / 8).
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
@@ -15977,6 +15975,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performExtractVectorEltCombine(N, DAG);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
+ case ISD::STEP_VECTOR:
+ return performStepVectorCombine(N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index afd8435cc6930..fde7740a2d972 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -938,7 +938,6 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
bool OverrideNEON = false) const;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 549fbe86fedb1..4af8e65b8b35a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1370,8 +1370,8 @@ let Predicates = [HasSVE] in {
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
- defm INDEX_RR : sve_int_index_rr<"index", index_vector, index_vector_oneuse>;
- defm INDEX_IR : sve_int_index_ir<"index", index_vector, index_vector_oneuse>;
+ defm INDEX_RR : sve_int_index_rr<"index", index_vector, index_vector_oneuse, AArch64mul_p_oneuse>;
+ defm INDEX_IR : sve_int_index_ir<"index", index_vector, index_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>;
defm INDEX_RI : sve_int_index_ri<"index", index_vector, index_vector_oneuse>;
defm INDEX_II : sve_int_index_ii<"index", index_vector, index_vector_oneuse>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 4b6a9819338c3..b88a6b93276e9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4837,7 +4837,7 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ir<string asm, SDPatternOperator op, SDPatternOperator oneuseop> {
+multiclass sve_int_index_ir<string asm, SDPatternOperator op, SDPatternOperator oneuseop, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>;
def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>;
@@ -4862,6 +4862,25 @@ multiclass sve_int_index_ir<string asm, SDPatternOperator op, SDPatternOperator
def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
(!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>;
+ // mul(index_vector(0, 1), dup(Y)) -> index_vector(0, Y).
+ def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "_B") (i32 0), GPR32:$Rm)>;
+ def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "_H") (i32 0), GPR32:$Rm)>;
+ def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "_S") (i32 0), GPR32:$Rm)>;
+ def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),
+ (!cast<Instruction>(NAME # "_D") (i64 0), GPR64:$Rm)>;
+
+ // add(mul(index_vector(0, 1), dup(Y), dup(X)) -> index_vector(X, Y).
+ def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))),
+ (!cast<Instruction>(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>;
+ def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))),
+ (!cast<Instruction>(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>;
+ def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))),
+ (!cast<Instruction>(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>;
+ def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))),
+ (!cast<Instruction>(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>;
}
class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -4924,7 +4943,7 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_rr<string asm, SDPatternOperator op, SDPatternOperator oneuseop> {
+multiclass sve_int_index_rr<string asm, SDPatternOperator op, SDPatternOperator oneuseop, SDPatternOperator mulop> {
def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>;
@@ -4944,6 +4963,16 @@ multiclass sve_int_index_rr<string asm, SDPatternOperator op, SDPatternOperator
(!cast<Instruction>(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>;
def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))),
(!cast<Instruction>(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>;
+
+ // add(mul(index_vector(0, 1), dup(Y), dup(X)) -> index_vector(X, Y).
+ def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))),
+ (!cast<Instruction>(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>;
+ def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))),
+ (!cast<Instruction>(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>;
+ def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))),
+ (!cast<Instruction>(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>;
+ def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))),
+ (!cast<Instruction>(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
index c678cdfebecf8..2bfdb1dd1137e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
;
@@ -6,132 +7,175 @@
define <vscale x 16 x i8> @index_ii_i8() {
; CHECK-LABEL: index_ii_i8:
-; CHECK: index z0.b, #-16, #15
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, #-16, #15
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 -16, i8 15)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @index_ii_i16() {
; CHECK-LABEL: index_ii_i16:
-; CHECK: index z0.h, #15, #-16
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #15, #-16
+; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 15, i16 -16)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @index_ii_i32() {
; CHECK-LABEL: index_ii_i32:
-; CHECK: index z0.s, #-16, #15
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #-16, #15
+; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -16, i32 15)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @index_ii_i64() {
; CHECK-LABEL: index_ii_i64:
-; CHECK: index z0.d, #15, #-16
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #15, #-16
+; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 15, i64 -16)
ret <vscale x 2 x i64> %out
}
define <vscale x 2 x i64> @index_ii_range() {
; CHECK-LABEL: index_ii_range:
-; CHECK: mov w8, #16
-; CHECK-NEXT: mov x9, #-17
-; CHECK-NEXT: index z0.d, x9, x8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #16
+; CHECK-NEXT: mov x9, #-17
+; CHECK-NEXT: index z0.d, x9, x8
+; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16)
ret <vscale x 2 x i64> %out
}
+define <vscale x 8 x i16> @index_ii_range_combine(i16 %a) {
+; CHECK-LABEL: index_ii_range_combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #2, #8
+; CHECK-NEXT: ret
+ %val = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+ %val1 = shufflevector <vscale x 8 x i16> %val, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %val2 = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 0, i16 2)
+ %val3 = shl <vscale x 8 x i16> %val2, %val1
+ %out = add <vscale x 8 x i16> %val3, %val1
+ ret <vscale x 8 x i16> %out
+}
+
;
; INDEX (IMMEDIATE, SCALAR)
;
define <vscale x 16 x i8> @index_ir_i8(i8 %a) {
; CHECK-LABEL: index_ir_i8:
-; CHECK: index z0.b, #15, w0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, #15, w0
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 15, i8 %a)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @index_ir_i16(i16 %a) {
; CHECK-LABEL: index_ir_i16:
-; CHECK: index z0.h, #-16, w0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, #-16, w0
+; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 -16, i16 %a)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @index_ir_i32(i32 %a) {
; CHECK-LABEL: index_ir_i32:
-; CHECK: index z0.s, #15, w0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #15, w0
+; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 15, i32 %a)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @index_ir_i64(i64 %a) {
; CHECK-LABEL: index_ir_i64:
-; CHECK: index z0.d, #-16, x0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, #-16, x0
+; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 -16, i64 %a)
ret <vscale x 2 x i64> %out
}
define <vscale x 4 x i32> @index_ir_range(i32 %a) {
; CHECK-LABEL: index_ir_range:
-; CHECK: mov w8, #-17
-; CHECK: index z0.s, w8, w0
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-17
+; CHECK-NEXT: index z0.s, w8, w0
+; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a)
ret <vscale x 4 x i32> %out
}
+define <vscale x 4 x i32> @index_ir_range_combine(i32 %a) {
+; CHECK-LABEL: index_ir_range_combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, #0, w0
+; CHECK-NEXT: ret
+ %val = insertelement <vscale x 4 x i32> poison, i32 2, i32 0
+ %val1 = shufflevector <vscale x 4 x i32> %val, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %tmp = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 2, i32 1)
+ %tmp1 = sub <vscale x 4 x i32> %tmp, %val1
+ %val2 = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+ %val3 = shufflevector <vscale x 4 x i32> %val2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %out = mul <vscale x 4 x i32> %tmp1, %val3
+ ret <vscale x 4 x i32> %out
+}
+
;
; INDEX (SCALAR, IMMEDIATE)
;
define <vscale x 16 x i8> @index_ri_i8(i8 %a) {
; CHECK-LABEL: index_ri_i8:
-; CHECK: index z0.b, w0, #-16
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, w0, #-16
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 -16)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @index_ri_i16(i16 %a) {
; CHECK-LABEL: index_ri_i16:
-; CHECK: index z0.h, w0, #15
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, w0, #15
+; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 15)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @index_ri_i32(i32 %a) {
; CHECK-LABEL: index_ri_i32:
-; CHECK: index z0.s, w0, #-16
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, w0, #-16
+; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 -16)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @index_ri_i64(i64 %a) {
; CHECK-LABEL: index_ri_i64:
-; CHECK: index z0.d, x0, #15
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, x0, #15
+; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 15)
ret <vscale x 2 x i64> %out
}
define <vscale x 8 x i16> @index_ri_range(i16 %a) {
; CHECK-LABEL: index_ri_range:
-; CHECK: mov w8, #16
-; CHECK: index z0.h, w0, w8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #16
+; CHECK-NEXT: index z0.h, w0, w8
+; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16)
ret <vscale x 8 x i16> %out
}
@@ -142,36 +186,76 @@ define <vscale x 8 x i16> @index_ri_range(i16 %a) {
define <vscale x 16 x i8> @index_rr_i8(i8 %a, i8 %b) {
; CHECK-LABEL: index_rr_i8:
-; CHECK: index z0.b, w0, w1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.b, w0, w1
+; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 %a, i8 %b)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @index_rr_i16(i16 %a, i16 %b) {
; CHECK-LABEL: index_rr_i16:
-; CHECK: index z0.h, w0, w1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.h, w0, w1
+; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 %b)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @index_rr_i32(i32 %a, i32 %b) {
; CHECK-LABEL: index_rr_i32:
-; CHECK: index z0.s, w0, w1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, w0, w1
+; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 %a, i32 %b)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @index_rr_i64(i64 %a, i64 %b) {
; CHECK-LABEL: index_rr_i64:
-; CHECK: index z0.d, x0, x1
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.d, x0, x1
+; CHECK-NEXT: ret
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %a, i64 %b)
ret <vscale x 2 x i64> %out
}
+define <vscale x 4 x i32> @index_rr_i32_combine(i32 %a, i32 %b) {
+; CHECK-LABEL: index_rr_i32_combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: index z0.s, w0, w1
+; CHECK-NEXT: ret
+ %val = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+ %val1 = shufflevector <vscale x 4 x i32> %val, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %val2 = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %val3 = shufflevector <vscale x 4 x i32> %val2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %tmp = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 0, i32 1)
+ %tmp1 = mul <vscale x 4 x i32> %tmp, %val3
+ %out = add <vscale x 4 x i32> %tmp1, %val1
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @index_rr_i32_not_combine(i32 %a, i32 %b) {
+; CHECK-LABEL: index_rr_i32_not_combine:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: mov z1.s, w1
+; CHECK-NEXT: index z2.s, #0, #1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mla z0.s, p0/m, z2.s, z1.s
+; CHECK-NEXT: add z0.s, z0.s, z2.s
+; CHECK-NEXT: ret
+ %val = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+ %val1 = shufflevector <vscale x 4 x i32> %val, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %val2 = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %val3 = shufflevector <vscale x 4 x i32> %val2, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %tmp = call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 0, i32 1)
+ %tmp1 = mul <vscale x 4 x i32> %tmp, %val3
+ %tmp2 = add <vscale x 4 x i32> %tmp1, %val1
+ %out = add <vscale x 4 x i32> %tmp2, %tmp
+ ret <vscale x 4 x i32> %out
+}
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8, i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16, i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32, i32)
More information about the llvm-commits
mailing list