[llvm] r242270 - AVX : Fix ISA disabling in case AVX512VL , some instructions should be disabled only if AVX512BW present.
Igor Breger
igor.breger at intel.com
Wed Jul 15 00:08:11 PDT 2015
Author: ibreger
Date: Wed Jul 15 02:08:10 2015
New Revision: 242270
URL: http://llvm.org/viewvc/llvm-project?rev=242270&view=rev
Log:
AVX : Fix ISA disabling in case AVX512VL , some instructions should be disabled only if AVX512BW present.
Tests added.
Differential Revision: http://reviews.llvm.org/D11122
Added:
llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=242270&r1=242269&r2=242270&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Jul 15 02:08:10 2015
@@ -767,6 +767,7 @@ def HasDQI : Predicate<"Subtarget-
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">,
AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
+def NoBWI : Predicate<"!Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=242270&r1=242269&r2=242270&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Jul 15 02:08:10 2015
@@ -2845,8 +2845,8 @@ multiclass PDI_binop_rm<bits<8> opc, str
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
- OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX, NoVLX] in
+ OpndItins itins, bit IsCommutable = 0, Predicate prd> {
+let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
@@ -2854,7 +2854,7 @@ let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, itins, IsCommutable, 1>;
-let Predicates = [HasAVX2, NoVLX] in
+let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
@@ -2863,13 +2863,13 @@ let Predicates = [HasAVX2, NoVLX] in
// These are ordered here for pattern ordering requirements with the fp versions
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 0>;
+ SSE_VEC_BIT_ITINS_P, 0, NoVLX>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -4010,39 +4010,39 @@ multiclass PDI_binop_rm2<bits<8> opc, st
} // ExeDomain = SSEPackedInt
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 1>;
+ SSE_INTALUQ_ITINS_P, 1, NoVLX>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoBWI>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 0>;
+ SSE_INTALUQ_ITINS_P, 0, NoVLX>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
// Intrinsic forms
defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
@@ -4239,17 +4239,17 @@ let ExeDomain = SSEPackedInt, SchedRW =
//===---------------------------------------------------------------------===//
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoBWI>;
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoBWI>;
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
Added: llvm/trunk/test/CodeGen/X86/avx-isa-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-isa-check.ll?rev=242270&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-isa-check.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx-isa-check.ll Wed Jul 15 02:08:10 2015
@@ -0,0 +1,215 @@
+; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
+
+define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = and <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = and <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %x = and <4 x i64> %a, %y
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
+ %x = and <2 x i64> %a, %y
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = or <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = xor <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = or <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = xor <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+ %x = add <4 x i64> %i, %j
+ ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %x = add <8 x i32> %i, %j
+ ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = add <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %x = add <32 x i8> %i, %j
+ ret <32 x i8> %x
+}
+
+define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+ %x = sub <4 x i64> %i, %j
+ ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %x = sub <8 x i32> %i, %j
+ ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = sub <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %x = sub <32 x i8> %i, %j
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = mul <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %bincmp = icmp slt <8 x i32> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %bincmp = icmp eq <32 x i8> %i, %j
+ %x = sext <32 x i1> %bincmp to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %bincmp = icmp eq <16 x i16> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %bincmp = icmp slt <32 x i8> %i, %j
+ %x = sext <32 x i1> %bincmp to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %bincmp = icmp slt <16 x i16> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %bincmp = icmp eq <8 x i32> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+ %x = add <2 x i64> %i, %j
+ ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+ %x = add <4 x i32> %i, %j
+ ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = add <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %x = add <16 x i8> %i, %j
+ ret <16 x i8> %x
+}
+
+define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+ %x = sub <2 x i64> %i, %j
+ ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+ %x = sub <4 x i32> %i, %j
+ ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = sub <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %x = sub <16 x i8> %i, %j
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = mul <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %bincmp = icmp slt <8 x i16> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %bincmp = icmp slt <16 x i8> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %bincmp = icmp eq <8 x i16> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %bincmp = icmp eq <16 x i8> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i8>
+ ret <16 x i8> %x
+}
More information about the llvm-commits
mailing list