[llvm-branch-commits] [llvm] d1b606f - [VE] Extract & insert vector element isel
Simon Moll via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 8 02:52:17 PST 2021
Author: Simon Moll
Date: 2021-01-08T11:46:59+01:00
New Revision: d1b606f897bee5f16ce9c459d3a79c8119018d9c
URL: https://github.com/llvm/llvm-project/commit/d1b606f897bee5f16ce9c459d3a79c8119018d9c
DIFF: https://github.com/llvm/llvm-project/commit/d1b606f897bee5f16ce9c459d3a79c8119018d9c.diff
LOG: [VE] Extract & insert vector element isel
Isel and tests for extract_vector_elt and insert_vector_elt.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D93687
Added:
llvm/test/CodeGen/VE/Vector/extract_elt.ll
llvm/test/CodeGen/VE/Vector/insert_elt.ll
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
llvm/lib/Target/VE/VEInstrPatternsVec.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 230ce42d46b3..a0d00ebca010 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -74,6 +74,8 @@ bool VETargetLowering::CanLowerReturn(
static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
MVT::v256f32, MVT::v512f32, MVT::v256f64};
+static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
+
void VETargetLowering::initRegisterClasses() {
// Set up the register classes.
addRegisterClass(MVT::i32, &VE::I32RegClass);
@@ -292,6 +294,8 @@ void VETargetLowering::initSPUActions() {
void VETargetLowering::initVPUActions() {
for (MVT LegalVecVT : AllVectorVTs) {
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
// Translate all vector instructions with legal element types to VVP_*
// nodes.
// TODO We will custom-widen into VVP_* nodes in the future. While we are
@@ -301,6 +305,11 @@ void VETargetLowering::initVPUActions() {
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
#include "VVPNodes.def"
}
+
+ for (MVT LegalPackedVT : AllPackedVTs) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
+ }
}
SDValue
@@ -1662,6 +1671,11 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG:
return lowerVAARG(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+
#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
#include "VVPNodes.def"
return lowerToVVP(Op, DAG);
@@ -2661,3 +2675,100 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
}
llvm_unreachable("lowerToVVP called for unexpected SDNode.");
}
+
+SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // Example of codes:
+ // %packed_v = extractelt %vr, %idx / 2
+ // %v = %packed_v >> (%idx % 2 * 32)
+ // %res = %v & 0xffffffff
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc DL(Op);
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant() */) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
+ Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::i32, PackedElt, SubI32),
+ 0);
+
+ if (Op.getSimpleValueType() == MVT::f32) {
+ Result = DAG.getBitcast(MVT::f32, Result);
+ } else {
+ assert(Op.getSimpleValueType() == MVT::i32);
+ }
+ }
+ return Result;
+}
+
+SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+ MVT VT = Op.getOperand(0).getSimpleValueType();
+
+ // Special treatment for packed V64 types.
+ assert(VT == MVT::v512i32 || VT == MVT::v512f32);
+ // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
+ // bits" required `val << 32` from C implementation's point of view.
+ //
+ // Example of codes:
+ // %packed_elt = extractelt %vr, (%idx >> 1)
+ // %shift = ((%idx & 1) ^ 1) << 5
+ // %packed_elt &= 0xffffffff00000000 >> shift
+ // %packed_elt |= (zext %val) << shift
+ // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
+
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ if (Idx.getSimpleValueType() == MVT::i32)
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
+ if (Val.getSimpleValueType() == MVT::f32)
+ Val = DAG.getBitcast(MVT::i32, Val);
+ assert(Val.getSimpleValueType() == MVT::i32);
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+
+ SDValue Result = Op;
+ if (0 /* Idx->isConstant()*/) {
+ // TODO: optimized implementation using constant values
+ } else {
+ SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
+ SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
+ SDValue PackedElt =
+ SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
+ SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
+ SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
+ SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
+ Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
+ SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
+ Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
+ PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
+ Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
+ PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
+ Result =
+ SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
+ {HalfIdx, PackedElt, Vec}),
+ 0);
+ }
+ return Result;
+}
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index f2055da0745e..a6e1bf396035 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -130,6 +130,8 @@ class VETargetLowering : public TargetLowering {
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
/// Replace the results of node with an illegal result
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 0e41473733c0..b6862cf7b30d 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1569,6 +1569,17 @@ defm SHMB : SHMm<"shm.b", 0x31, I64>;
// Pattern Matchings
//===----------------------------------------------------------------------===//
+// Basic cast between registers. This is often used in ISel patterns, so make
+// them as OutPatFrag.
+def i2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
+def l2i : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_i32)>;
+def f2l : OutPatFrag<(ops node:$exp),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_f32)>;
+def l2f : OutPatFrag<(ops node:$exp),
+ (EXTRACT_SUBREG $exp, sub_f32)>;
+
// Small immediates.
def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
@@ -1782,9 +1793,6 @@ defm : ATMLDm<atomic_load_16, LD2BZXrri, LD2BZXrii, LD2BZXzri, LD2BZXzii>;
defm : ATMLDm<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
defm : ATMLDm<atomic_load_64, LDrri, LDrii, LDzri, LDzii>;
-def i2l : OutPatFrag<(ops node:$exp),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $exp, sub_i32)>;
-
// Optimized atomic loads with sext
multiclass SXATMLDm<SDPatternOperator from, Operand TY,
SDPatternOperator torri, SDPatternOperator torii,
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index c08e7ba7bbe7..0084876f9f1b 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -15,22 +15,19 @@
// Instruction format superclass
//===----------------------------------------------------------------------===//
-multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, int SubRegIdx> {
+multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
+ SDNodeXForm ImmCast, SDNodeXForm SuperRegCast> {
// VBRDil
def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)),
(VBRDil (ImmCast $sy), i32:$vl)>;
// VBRDrl
def : Pat<(v32 (vec_broadcast s32:$sy, i32:$vl)),
- (VBRDrl
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, SubRegIdx),
- i32:$vl)>;
+ (VBRDrl (SuperRegCast $sy), i32:$vl)>;
}
-defm : vbrd_elem32<v256f32, f32, simm7fp, LO7FP, sub_f32>;
-defm : vbrd_elem32<v256i32, i32, simm7, LO7, sub_i32>;
-
-multiclass vbrd_elem64<ValueType v64, ValueType s64, SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+multiclass vbrd_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
// VBRDil
def : Pat<(v64 (vec_broadcast (s64 ImmOp:$sy), i32:$vl)),
(VBRDil (ImmCast $sy), i32:$vl)>;
@@ -40,5 +37,55 @@ multiclass vbrd_elem64<ValueType v64, ValueType s64, SDPatternOperator ImmOp, SD
(VBRDrl s64:$sy, i32:$vl)>;
}
-defm : vbrd_elem64<v256f64, f64, simm7fp, LO7FP>;
-defm : vbrd_elem64<v256i64, i64, simm7, LO7>;
+multiclass extract_insert_elem32<ValueType v32, ValueType s32,
+ SDNodeXForm SubRegCast,
+ SDNodeXForm SuperRegCast> {
+ // LVSvi
+ def: Pat<(s32 (extractelt v32:$vec, uimm7:$idx)),
+ (SubRegCast (LVSvi v32:$vec, (ULO7 $idx)))>;
+ // LVSvr
+ def: Pat<(s32 (extractelt v32:$vec, i64:$idx)),
+ (SubRegCast (LVSvr v32:$vec, $idx))>;
+
+ // LSVir
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), (SuperRegCast $val), $vec)>;
+ // LSVrr
+ def: Pat<(v32 (insertelt v32:$vec, s32:$val, i64:$idx)),
+ (LSVrr_v $idx, (SuperRegCast $val), $vec)>;
+}
+
+multiclass extract_insert_elem64<ValueType v64, ValueType s64> {
+ // LVSvi
+ def: Pat<(s64 (extractelt v64:$vec, uimm7:$idx)),
+ (LVSvi v64:$vec, (ULO7 $idx))>;
+ // LVSvr
+ def: Pat<(s64 (extractelt v64:$vec, i64:$idx)),
+ (LVSvr v64:$vec, $idx)>;
+
+ // LSVir
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, uimm7:$idx)),
+ (LSVir_v (ULO7 $idx), $val, $vec)>;
+ // LSVrr
+ def: Pat<(v64 (insertelt v64:$vec, s64:$val, i64:$idx)),
+ (LSVrr_v $idx, $val, $vec)>;
+}
+
+multiclass patterns_elem32<ValueType v32, ValueType s32,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast,
+ SDNodeXForm SubRegCast, SDNodeXForm SuperRegCast> {
+ defm : vbrd_elem32<v32, s32, ImmOp, ImmCast, SuperRegCast>;
+ defm : extract_insert_elem32<v32, s32, SubRegCast, SuperRegCast>;
+}
+
+multiclass patterns_elem64<ValueType v64, ValueType s64,
+ SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ defm : vbrd_elem64<v64, s64, ImmOp, ImmCast>;
+ defm : extract_insert_elem64<v64, s64>;
+}
+
+defm : patterns_elem32<v256i32, i32, simm7, LO7, l2i, i2l>;
+defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
+
+defm : patterns_elem64<v256i64, i64, simm7, LO7>;
+defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
diff --git a/llvm/test/CodeGen/VE/Vector/extract_elt.ll b/llvm/test/CodeGen/VE/Vector/extract_elt.ll
new file mode 100644
index 000000000000..c9c2228f74b0
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/extract_elt.ll
@@ -0,0 +1,192 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+
+;;; <256 x i64>
+
+define fastcc i64 @extract_rr_v256i64(i32 signext %idx, <256 x i64> %v) {
+; CHECK-LABEL: extract_rr_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i64> %v, i32 %idx
+ ret i64 %ret
+}
+
+define fastcc i64 @extract_ri7_v256i64(<256 x i64> %v) {
+; CHECK-LABEL: extract_ri7_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(127)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i64> %v, i32 127
+ ret i64 %ret
+}
+
+define fastcc i64 @extract_ri8_v256i64(<256 x i64> %v) {
+; CHECK-LABEL: extract_ri8_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 128
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i64> %v, i32 128
+ ret i64 %ret
+}
+
+define fastcc i64 @extract_ri_v512i64(<512 x i64> %v) {
+; CHECK-LABEL: extract_ri_v512i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v1(116)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x i64> %v, i32 372
+ ret i64 %ret
+}
+
+;;; <256 x i32>
+
+define fastcc i32 @extract_rr_v256i32(i32 signext %idx, <256 x i32> %v) {
+; CHECK-LABEL: extract_rr_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i32> %v, i32 %idx
+ ret i32 %ret
+}
+
+define fastcc i32 @extract_ri7_v256i32(<256 x i32> %v) {
+; CHECK-LABEL: extract_ri7_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(127)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i32> %v, i32 127
+ ret i32 %ret
+}
+
+define fastcc i32 @extract_ri8_v256i32(<256 x i32> %v) {
+; CHECK-LABEL: extract_ri8_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 128
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x i32> %v, i32 128
+ ret i32 %ret
+}
+
+define fastcc i32 @extract_ri_v512i32(<512 x i32> %v) {
+; CHECK-LABEL: extract_ri_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 186
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x i32> %v, i32 372
+ ret i32 %ret
+}
+
+define fastcc i32 @extract_rr_v512i32(<512 x i32> %v, i32 signext %idx) {
+; CHECK-LABEL: extract_rr_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lvs %s1, %v0(%s1)
+; CHECK-NEXT: nnd %s0, %s0, (63)0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 5
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x i32> %v, i32 %idx
+ ret i32 %ret
+}
+
+;;; <256 x double>
+
+define fastcc double @extract_rr_v256f64(i32 signext %idx, <256 x double> %v) {
+; CHECK-LABEL: extract_rr_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x double> %v, i32 %idx
+ ret double %ret
+}
+
+define fastcc double @extract_ri7_v256f64(<256 x double> %v) {
+; CHECK-LABEL: extract_ri7_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(127)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x double> %v, i32 127
+ ret double %ret
+}
+
+define fastcc double @extract_ri8_v256f64(<256 x double> %v) {
+; CHECK-LABEL: extract_ri8_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 128
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x double> %v, i32 128
+ ret double %ret
+}
+
+define fastcc double @extract_ri_v512f64(<512 x double> %v) {
+; CHECK-LABEL: extract_ri_v512f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v1(116)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x double> %v, i32 372
+ ret double %ret
+}
+
+;;; <256 x float>
+
+define fastcc float @extract_rr_v256f32(i32 signext %idx, <256 x float> %v) {
+; CHECK-LABEL: extract_rr_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x float> %v, i32 %idx
+ ret float %ret
+}
+
+define fastcc float @extract_ri7_v256f32(<256 x float> %v) {
+; CHECK-LABEL: extract_ri7_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lvs %s0, %v0(127)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x float> %v, i32 127
+ ret float %ret
+}
+
+define fastcc float @extract_ri8_v256f32(<256 x float> %v) {
+; CHECK-LABEL: extract_ri8_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 128
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <256 x float> %v, i32 128
+ ret float %ret
+}
+
+define fastcc float @extract_ri_v512f32(<512 x float> %v) {
+; CHECK-LABEL: extract_ri_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 186
+; CHECK-NEXT: lvs %s0, %v0(%s0)
+; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: sll %s0, %s0, 32
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x float> %v, i32 372
+ ret float %ret
+}
+
+define fastcc float @extract_rr_v512f32(<512 x float> %v, i32 signext %idx) {
+; CHECK-LABEL: extract_rr_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: srl %s1, %s0, 1
+; CHECK-NEXT: lvs %s1, %v0(%s1)
+; CHECK-NEXT: nnd %s0, %s0, (63)0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 5
+; CHECK-NEXT: srl %s0, %s1, %s0
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: sll %s0, %s0, 32
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = extractelement <512 x float> %v, i32 %idx
+ ret float %ret
+}
diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
new file mode 100644
index 000000000000..7ccd45690e9d
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
@@ -0,0 +1,216 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+
+;;; <256 x i64>
+
+define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
+; CHECK-LABEL: insert_rr_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lsv %v0(%s0), %s1
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i64> undef, i64 %s, i32 %idx
+ ret <256 x i64> %ret
+}
+
+define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) {
+; CHECK-LABEL: insert_ri7_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i64> undef, i64 %s, i32 127
+ ret <256 x i64> %ret
+}
+
+define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) {
+; CHECK-LABEL: insert_ri8_v256i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i64> undef, i64 %s, i32 128
+ ret <256 x i64> %ret
+}
+
+define fastcc <512 x i64> @insert_ri_v512i64(i64 %s) {
+; CHECK-LABEL: insert_ri_v512i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x i64> undef, i64 %s, i32 372
+ ret <512 x i64> %ret
+}
+
+;;; <256 x i32>
+
+define fastcc <256 x i32> @insert_rr_v256i32(i32 signext %idx, i32 signext %s) {
+; CHECK-LABEL: insert_rr_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lsv %v0(%s0), %s1
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i32> undef, i32 %s, i32 %idx
+ ret <256 x i32> %ret
+}
+
+define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) {
+; CHECK-LABEL: insert_ri7_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i32> undef, i32 %s, i32 127
+ ret <256 x i32> %ret
+}
+
+define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) {
+; CHECK-LABEL: insert_ri8_v256i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x i32> undef, i32 %s, i32 128
+ ret <256 x i32> %ret
+}
+
+define fastcc <512 x i32> @insert_ri_v512i32(i32 signext %s) {
+; CHECK-LABEL: insert_ri_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 512
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x i32> undef, i32 %s, i32 372
+ ret <512 x i32> %ret
+}
+
+define fastcc <512 x i32> @insert_rr_v512i32(i32 signext %idx, i32 signext %s) {
+; CHECK-LABEL: insert_rr_v512i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: nnd %s2, %s0, (63)0
+; CHECK-NEXT: sla.w.sx %s2, %s2, 5
+; CHECK-NEXT: sll %s1, %s1, %s2
+; CHECK-NEXT: srl %s0, %s0, 1
+; CHECK-NEXT: lvs %s3, %v0(%s0)
+; CHECK-NEXT: srl %s2, (32)1, %s2
+; CHECK-NEXT: and %s2, %s3, %s2
+; CHECK-NEXT: or %s1, %s2, %s1
+; CHECK-NEXT: lsv %v0(%s0), %s1
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x i32> undef, i32 %s, i32 %idx
+ ret <512 x i32> %ret
+}
+
+;;; <256 x double>
+
+define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
+; CHECK-LABEL: insert_rr_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lsv %v0(%s0), %s1
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x double> undef, double %s, i32 %idx
+ ret <256 x double> %ret
+}
+
+define fastcc <256 x double> @insert_ri7_v256f64(double %s) {
+; CHECK-LABEL: insert_ri7_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x double> undef, double %s, i32 127
+ ret <256 x double> %ret
+}
+
+define fastcc <256 x double> @insert_ri8_v256f64(double %s) {
+; CHECK-LABEL: insert_ri8_v256f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x double> undef, double %s, i32 128
+ ret <256 x double> %ret
+}
+
+define fastcc <512 x double> @insert_ri_v512f64(double %s) {
+; CHECK-LABEL: insert_ri_v512f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x double> undef, double %s, i32 372
+ ret <512 x double> %ret
+}
+
+;;; <256 x float>
+
+define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
+; CHECK-LABEL: insert_rr_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lsv %v0(%s0), %s1
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x float> undef, float %s, i32 %idx
+ ret <256 x float> %ret
+}
+
+define fastcc <256 x float> @insert_ri7_v256f32(float %s) {
+; CHECK-LABEL: insert_ri7_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x float> undef, float %s, i32 127
+ ret <256 x float> %ret
+}
+
+define fastcc <256 x float> @insert_ri8_v256f32(float %s) {
+; CHECK-LABEL: insert_ri8_v256f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 256
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <256 x float> undef, float %s, i32 128
+ ret <256 x float> %ret
+}
+
+define fastcc <512 x float> @insert_ri_v512f32(float %s) {
+; CHECK-LABEL: insert_ri_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lea %s0, 512
+; CHECK-NEXT: lvl %s0
+; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x float> undef, float %s, i32 372
+ ret <512 x float> %ret
+}
+
+define fastcc <512 x float> @insert_rr_v512f32(i32 signext %idx, float %s) {
+; CHECK-LABEL: insert_rr_v512f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: sra.l %s1, %s1, 32
+; CHECK-NEXT: srl %s2, %s0, 1
+; CHECK-NEXT: lvs %s3, %v0(%s2)
+; CHECK-NEXT: nnd %s0, %s0, (63)0
+; CHECK-NEXT: sla.w.sx %s0, %s0, 5
+; CHECK-NEXT: srl %s4, (32)1, %s0
+; CHECK-NEXT: and %s3, %s3, %s4
+; CHECK-NEXT: adds.w.zx %s1, %s1, (0)1
+; CHECK-NEXT: sll %s0, %s1, %s0
+; CHECK-NEXT: or %s0, %s3, %s0
+; CHECK-NEXT: lsv %v0(%s2), %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %ret = insertelement <512 x float> undef, float %s, i32 %idx
+ ret <512 x float> %ret
+}
More information about the llvm-branch-commits
mailing list