[llvm-commits] [llvm] r159504 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/avx2-vbroadcast.ll
Elena Demikhovsky
elena.demikhovsky at intel.com
Sat Jun 30 23:12:26 PDT 2012
Author: delena
Date: Sun Jul 1 01:12:26 2012
New Revision: 159504
URL: http://llvm.org/viewvc/llvm-project?rev=159504&view=rev
Log:
Optimization of shuffle node that can fit to the register form of VBROADCAST instruction on AVX2.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=159504&r1=159503&r2=159504&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jul 1 01:12:26 2012
@@ -5047,8 +5047,16 @@
SDValue Sc = Op.getOperand(0);
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
- Sc.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
+ Sc.getOpcode() != ISD::BUILD_VECTOR) {
+
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+
+ // Use the register form of the broadcast instruction available on AVX2.
+ if (VT.is256BitVector())
+ Sc = Extract128BitVector(Sc, 0, DAG, dl);
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+ }
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=159504&r1=159503&r2=159504&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Jul 1 01:12:26 2012
@@ -7272,8 +7272,8 @@
int_x86_avx2_vbroadcast_ss_ps_256>;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256>;
+def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+ int_x86_avx2_vbroadcast_sd_pd_256>;
let Predicates = [HasAVX2] in
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
@@ -7684,6 +7684,31 @@
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
+ def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBrr VR128:$src)>;
+ def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
+ (VPBROADCASTBYrr VR128:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWrr VR128:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
+ (VPBROADCASTWYrr VR128:$src)>;
+ def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDrr VR128:$src)>;
+ def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
+ (VPBROADCASTDYrr VR128:$src)>;
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
+ (VPBROADCASTQYrr VR128:$src)>;
+ def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSrr VR128:$src)>;
+ def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
+ (VBROADCASTSSYrr VR128:$src)>;
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VPBROADCASTQrr VR128:$src)>;
+ def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
+ (VBROADCASTSDYrr VR128:$src)>;
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
@@ -7694,7 +7719,7 @@
(VBROADCASTSSYrr
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
@@ -7704,7 +7729,7 @@
(VBROADCASTSSYrr
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDrr
+ (VBROADCASTSDYrr
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
}
}
Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=159504&r1=159503&r2=159504&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Sun Jul 1 01:12:26 2012
@@ -259,3 +259,99 @@
ret <4 x double> %wide
}
+;CHECK: _inreg8xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define <8 x float> @_inreg8xfloat(<8 x float> %a) {
+ %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
+ ret <8 x float> %b
+}
+
+;CHECK: _inreg4xfloat
+;CHECK: vbroadcastss
+;CHECK: ret
+define <4 x float> @_inreg4xfloat(<4 x float> %a) {
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %b
+}
+
+;CHECK: _inreg16xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+ %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %b
+}
+
+;CHECK: _inreg8xi16
+;CHECK: vpbroadcastw
+;CHECK: ret
+define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+ %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %b
+}
+
+
+;CHECK: _inreg4xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
+ ret <4 x i64> %b
+}
+
+;CHECK: _inreg2xi64
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+ %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %b
+}
+
+;CHECK: _inreg4xdouble
+;CHECK: vbroadcastsd
+;CHECK: ret
+define <4 x double> @_inreg4xdouble(<4 x double> %a) {
+ %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
+ ret <4 x double> %b
+}
+
+;CHECK: _inreg2xdouble
+;CHECK: vpbroadcastq
+;CHECK: ret
+define <2 x double> @_inreg2xdouble(<2 x double> %a) {
+ %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %b
+}
+
+;CHECK: _inreg8xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+ %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %b
+}
+
+;CHECK: _inreg4xi32
+;CHECK: vpbroadcastd
+;CHECK: ret
+define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+ %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %b
+}
+
+;CHECK: _inreg32xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+ %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %b
+}
+
+;CHECK: _inreg16xi8
+;CHECK: vpbroadcastb
+;CHECK: ret
+define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+ %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %b
+}
More information about the llvm-commits
mailing list