[llvm] r295565 - Revert "[X86] Remove XOP VPCMOV intrinsics and autoupgrade them to native IR."
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 18 12:14:20 PST 2017
Author: ctopper
Date: Sat Feb 18 14:14:20 2017
New Revision: 295565
URL: http://llvm.org/viewvc/llvm-project?rev=295565&view=rev
Log:
Revert "[X86] Remove XOP VPCMOV intrinsics and autoupgrade them to native IR."
This reverts r295564. I missed that clang was still using the intrinsics despite our half implemented autoupgrade support.
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll
llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sat Feb 18 14:14:20 2017
@@ -3033,6 +3033,17 @@ let TargetPrefix = "x86" in { // All in
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_xop_vpcmov :
+ GCCBuiltin<"__builtin_ia32_vpcmov">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_xop_vpcmov_256 :
+ GCCBuiltin<"__builtin_ia32_vpcmov_256">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+
def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Sat Feb 18 14:14:20 2017
@@ -230,7 +230,7 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx2.pblendd.") || // Added in 3.7
Name.startswith("avx.vbroadcastf128") || // Added in 4.0
Name == "avx2.vbroadcasti128" || // Added in 3.7
- Name.startswith("xop.vpcmov") || // Added in 3.8
+ Name == "xop.vpcmov" || // Added in 3.8
Name.startswith("avx512.mask.move.s") || // Added in 4.0
(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2))
@@ -1078,7 +1078,7 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
- } else if (IsX86 && Name.startswith("xop.vpcmov")) {
+ } else if (IsX86 && Name == "xop.vpcmov") {
Value *Sel = CI->getArgOperand(2);
Value *NotSel = Builder.CreateNot(Sel);
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Feb 18 14:14:20 2017
@@ -1777,7 +1777,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
- { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
+ { X86::VPCMOVrrrY, X86::VPCMOVrmrY, 0 },
{ X86::VPCOMBri, X86::VPCOMBmi, 0 },
{ X86::VPCOMDri, X86::VPCOMDmi, 0 },
{ X86::VPCOMQri, X86::VPCOMQmi, 0 },
@@ -2478,7 +2478,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
- { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
+ { X86::VPCMOVrrrY, X86::VPCMOVrrmY, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
{ X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Sat Feb 18 14:14:20 2017
@@ -290,41 +290,84 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where either second or third source can be memory
-multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, ValueType VT> {
+multiclass xop4op_int<bits<8> opc, string OpcodeStr,
+ Intrinsic Int128, Intrinsic Int256> {
// 128-bit Instruction
- def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
+ def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
- (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V;
- def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, x86memop:$src3),
+ [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>,
+ XOP_4V;
+ def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
- (X86andnp (load addr:$src3), RC:$src2))))]>,
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, VR128:$src2,
+ (bitconvert (loadv2i64 addr:$src3))))]>,
XOP_4V, VEX_W;
- def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, RC:$src3),
+ def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
- (X86andnp RC:$src3, (load addr:$src2)))))]>,
+ [(set VR128:$dst,
+ (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
+ VR128:$src3))]>,
XOP_4V;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, RC:$src3),
+ def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W;
+
+ // 256-bit Instruction
+ def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>,
+ XOP_4V, VEX_L;
+ def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, VR256:$src2,
+ (bitconvert (loadv4i64 addr:$src3))))]>,
+ XOP_4V, VEX_W, VEX_L;
+ def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR256:$dst,
+ (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
+ VR256:$src3))]>,
+ XOP_4V, VEX_L;
+ // For disassembler
+ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, XOP_4V, VEX_W, VEX_L;
}
let ExeDomain = SSEPackedInt in {
- defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64>;
- defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L;
+ defm VPCMOV : xop4op_int<0xA2, "vpcmov",
+ int_x86_xop_vpcmov, int_x86_xop_vpcmov_256>;
+}
+
+let Predicates = [HasXOP] in {
+ def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
}
multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
Modified: llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-fast-isel.ll Sat Feb 18 14:14:20 2017
@@ -499,22 +499,12 @@ declare <2 x i64> @llvm.x86.xop.vpcmov(<
define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; X32-LABEL: test_mm256_cmov_si256:
; X32: # BB#0:
-; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; X32-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3
-; X32-NEXT: vxorps %ymm3, %ymm2, %ymm3
-; X32-NEXT: vandps %ymm2, %ymm0, %ymm0
-; X32-NEXT: vandps %ymm3, %ymm1, %ymm1
-; X32-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X32-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_cmov_si256:
; X64: # BB#0:
-; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; X64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3
-; X64-NEXT: vxorps %ymm3, %ymm2, %ymm3
-; X64-NEXT: vandps %ymm2, %ymm0, %ymm0
-; X64-NEXT: vandps %ymm3, %ymm1, %ymm1
-; X64-NEXT: vorps %ymm1, %ymm0, %ymm0
+; X64-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; X64-NEXT: retq
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
ret <4 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64-upgrade.ll Sat Feb 18 14:14:20 2017
@@ -725,42 +725,3 @@ define <8 x i16> @test_int_x86_xop_vpcom
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
-; CHECK-LABEL: test_int_x86_xop_vpcmov:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
- %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
-
-define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
-; CHECK-LABEL: test_int_x86_xop_vpcmov_256:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
- %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
- ret <4 x i64> %res
-}
-define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
-; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0
-; CHECK-NEXT: retq
- %vec = load <4 x i64>, <4 x i64>* %a1
- %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
- ret <4 x i64> %res
-}
-define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
-; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
- %vec = load <4 x i64>, <4 x i64>* %a2
- %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
-
Modified: llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll?rev=295565&r1=295564&r2=295565&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xop-intrinsics-x86_64.ll Sat Feb 18 14:14:20 2017
@@ -82,23 +82,18 @@ define <2 x i64> @test_int_x86_xop_vpcmo
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
- %1 = xor <2 x i64> %a2, <i64 -1, i64 -1>
- %2 = and <2 x i64> %a0, %a2
- %3 = and <2 x i64> %a1, %1
- %4 = or <2 x i64> %2, %3
- ret <2 x i64> %4
+ %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+ ret <2 x i64> %res
}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
; CHECK-LABEL: test_int_x86_xop_vpcmov_256:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
- %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
- %2 = and <4 x i64> %a0, %a2
- %3 = and <4 x i64> %a1, %1
- %4 = or <4 x i64> %2, %3
- ret <4 x i64> %4
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+ ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr:
@@ -106,24 +101,19 @@ define <4 x i64> @test_int_x86_xop_vpcmo
; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%vec = load <4 x i64>, <4 x i64>* %a1
- %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
- %2 = and <4 x i64> %a0, %a2
- %3 = and <4 x i64> %vec, %1
- %4 = or <4 x i64> %2, %3
- ret <4 x i64> %4
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+ ret <4 x i64> %res
}
define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm:
; CHECK: # BB#0:
; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
- %vec = load <4 x i64>, <4 x i64>* %a2
- %1 = xor <4 x i64> %vec, <i64 -1, i64 -1, i64 -1, i64 -1>
- %2 = and <4 x i64> %a0, %vec
- %3 = and <4 x i64> %a1, %1
- %4 = or <4 x i64> %2, %3
- ret <4 x i64> %4
+ %vec = load <4 x i64>, <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+ ret <4 x i64> %res
}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
; CHECK-LABEL: test_int_x86_xop_vphaddbd:
More information about the llvm-commits
mailing list