[llvm] r278090 - [X86] Remove the Fv packed logical operation alias instructions. Replace them with patterns to the regular instructions.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 8 20:06:33 PDT 2016
Author: ctopper
Date: Mon Aug 8 22:06:33 2016
New Revision: 278090
URL: http://llvm.org/viewvc/llvm-project?rev=278090&view=rev
Log:
[X86] Remove the Fv packed logical operation alias instructions. Replace them with patterns to the regular instructions.
This enables execution domain fixing which is why the tests changed.
Modified:
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll
llvm/trunk/test/CodeGen/X86/fp-logic.ll
llvm/trunk/test/CodeGen/X86/fp128-cast.ll
llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
llvm/trunk/test/CodeGen/X86/vec_fabs.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Aug 8 22:06:33 2016
@@ -975,19 +975,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::DIVSSrr_Int, X86::DIVSSrm_Int, 0 },
{ X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
{ X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
-
- // Do not fold Fs* scalar logical op loads because there are no scalar
- // load variants for these instructions. When folded, the load is required
- // to be 128-bits, so the load size would not match.
-
- { X86::FvANDNPDrr, X86::FvANDNPDrm, TB_ALIGN_16 },
- { X86::FvANDNPSrr, X86::FvANDNPSrm, TB_ALIGN_16 },
- { X86::FvANDPDrr, X86::FvANDPDrm, TB_ALIGN_16 },
- { X86::FvANDPSrr, X86::FvANDPSrm, TB_ALIGN_16 },
- { X86::FvORPDrr, X86::FvORPDrm, TB_ALIGN_16 },
- { X86::FvORPSrr, X86::FvORPSrm, TB_ALIGN_16 },
- { X86::FvXORPDrr, X86::FvXORPDrm, TB_ALIGN_16 },
- { X86::FvXORPSrr, X86::FvXORPSrm, TB_ALIGN_16 },
{ X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
{ X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
{ X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
@@ -1295,17 +1282,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, 0 },
{ X86::VDPPDrri, X86::VDPPDrmi, 0 },
{ X86::VDPPSrri, X86::VDPPSrmi, 0 },
- // Do not fold VFs* loads because there are no scalar load variants for
- // these instructions. When folded, the load is required to be 128-bits, so
- // the load size would not match.
- { X86::VFvANDNPDrr, X86::VFvANDNPDrm, 0 },
- { X86::VFvANDNPSrr, X86::VFvANDNPSrm, 0 },
- { X86::VFvANDPDrr, X86::VFvANDPDrm, 0 },
- { X86::VFvANDPSrr, X86::VFvANDPSrm, 0 },
- { X86::VFvORPDrr, X86::VFvORPDrm, 0 },
- { X86::VFvORPSrr, X86::VFvORPSrm, 0 },
- { X86::VFvXORPDrr, X86::VFvXORPDrm, 0 },
- { X86::VFvXORPSrr, X86::VFvXORPSrm, 0 },
{ X86::VHADDPDrr, X86::VHADDPDrm, 0 },
{ X86::VHADDPSrr, X86::VHADDPSrm, 0 },
{ X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Aug 8 22:06:33 2016
@@ -2845,51 +2845,6 @@ let isCodeGenOnly = 1 in {
SSE_BIT_ITINS_P>;
}
-// Multiclass for vectors using the X86 logical operation aliases for FP.
-multiclass sse12_fp_packed_vector_logical_alias<
- bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
- PS, VEX_4V;
-
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>,
- PD, VEX_4V;
-
- defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
- PS, VEX_4V, VEX_L;
-
- defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
- PD, VEX_4V, VEX_L;
- }
-
- let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle, itins>,
- PS;
-
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble, itins>,
- PD;
- }
-}
-
-let isCodeGenOnly = 1 in {
- defm FvAND : sse12_fp_packed_vector_logical_alias<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
- defm FvOR : sse12_fp_packed_vector_logical_alias<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
- defm FvXOR : sse12_fp_packed_vector_logical_alias<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-
- let isCommutable = 0 in
- defm FvANDN : sse12_fp_packed_vector_logical_alias<0x55, "andn", X86fandn,
- SSE_BIT_ITINS_P>;
-}
-
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
@@ -2971,6 +2926,120 @@ let Predicates = [HasAVX1Only] in {
(VANDNPSYrm VR256:$src1, addr:$src2)>;
}
+let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
+ def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
+ (VANDPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
+ (VORPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
+ (VXORPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
+ (VANDNPSrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)),
+ (VANDPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)),
+ (VORPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)),
+ (VXORPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)),
+ (VANDNPSrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
+ (VANDPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
+ (VORPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
+ (VXORPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
+ (VANDNPDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)),
+ (VANDPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)),
+ (VORPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)),
+ (VXORPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)),
+ (VANDNPDrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)),
+ (VANDPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)),
+ (VORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)),
+ (VXORPSYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)),
+ (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+
+ def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)),
+ (VANDPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)),
+ (VORPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)),
+ (VXORPDYrr VR256:$src1, VR256:$src2)>;
+ def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)),
+ (VANDNPDYrr VR256:$src1, VR256:$src2)>;
+
+ def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)),
+ (VANDPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)),
+ (VORPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)),
+ (VXORPDYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)),
+ (VANDNPDYrm VR256:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE1] in {
+ def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
+ (ORPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPSrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
+ (ANDNPSrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
+ (ORPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
+ (XORPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
+ (ANDNPSrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2] in {
+ def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)),
+ (ANDPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)),
+ (ORPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)),
+ (XORPDrr VR128:$src1, VR128:$src2)>;
+ def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)),
+ (ANDNPDrr VR128:$src1, VR128:$src2)>;
+
+ def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)),
+ (ANDPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)),
+ (ORPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)),
+ (XORPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)),
+ (ANDNPDrm VR128:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll (original)
+++ llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll Mon Aug 8 22:06:33 2016
@@ -17,28 +17,28 @@ define void @test_copysign_const_magnitu
; CHECK: id
%iX = call double @id_d(double %X)
-; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
%d0 = call double @copysign(double 0.000000e+00, double %iX)
; CHECK-NEXT: id
%id0 = call double @id_d(double %d0)
-; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
-; CHECK-NEXT: orpd [[ZERO]](%rip), %xmm0
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ZERO]](%rip), %xmm0
%dn0 = call double @copysign(double -0.000000e+00, double %id0)
; CHECK-NEXT: id
%idn0 = call double @id_d(double %dn0)
-; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
-; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ONE]](%rip), %xmm0
%d1 = call double @copysign(double 1.000000e+00, double %idn0)
; CHECK-NEXT: id
%id1 = call double @id_d(double %d1)
-; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
-; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ONE]](%rip), %xmm0
%dn1 = call double @copysign(double -1.000000e+00, double %id1)
; CHECK-NEXT: id
Modified: llvm/trunk/test/CodeGen/X86/fp-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic.ll?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic.ll Mon Aug 8 22:06:33 2016
@@ -265,7 +265,7 @@ define float @movmsk(float %x) {
define double @bitcast_fabs(double %x) {
; CHECK-LABEL: bitcast_fabs:
; CHECK: # BB#0:
-; CHECK-NEXT: andpd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast double %x to i64
Modified: llvm/trunk/test/CodeGen/X86/fp128-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-cast.ll?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-cast.ll Mon Aug 8 22:06:33 2016
@@ -351,8 +351,8 @@ cleanup:
;
; X64-LABEL: TestTruncCopysign:
; X64: callq __trunctfdf2
-; X64-NEXT: andpd {{.*}}, %xmm0
-; X64-NEXT: orpd {{.*}}, %xmm0
+; X64-NEXT: andps {{.*}}, %xmm0
+; X64-NEXT: orps {{.*}}, %xmm0
; X64-NEXT: callq __extenddftf2
; X64: retq
}
Modified: llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-fcopysign.ll Mon Aug 8 22:06:33 2016
@@ -108,9 +108,9 @@ define double @int2(double %a, float %b,
; X64: # BB#0:
; X64-NEXT: addss %xmm2, %xmm1
; X64-NEXT: cvtss2sd %xmm1, %xmm1
-; X64-NEXT: andpd {{.*}}(%rip), %xmm1
-; X64-NEXT: andpd {{.*}}(%rip), %xmm0
-; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: andps {{.*}}(%rip), %xmm1
+; X64-NEXT: andps {{.*}}(%rip), %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: retq
%tmp1 = fadd float %b, %c
%tmp2 = fpext float %tmp1 to double
Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=278090&r1=278089&r2=278090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Mon Aug 8 22:06:33 2016
@@ -7,12 +7,12 @@
define <2 x double> @fabs_v2f64(<2 x double> %p) {
; X32-LABEL: fabs_v2f64:
; X32: # BB#0:
-; X32-NEXT: vandpd .LCPI0_0, %xmm0, %xmm0
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: fabs_v2f64:
; X64: # BB#0:
-; X64-NEXT: vandpd {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: retq
%t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
ret <2 x double> %t
@@ -22,7 +22,7 @@ declare <2 x double> @llvm.fabs.v2f64(<2
define <4 x float> @fabs_v4f32(<4 x float> %p) {
; X32-LABEL: fabs_v4f32:
; X32: # BB#0:
-; X32-NEXT: vandps .LCPI1_0, %xmm0, %xmm0
+; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: fabs_v4f32:
@@ -37,12 +37,12 @@ declare <4 x float> @llvm.fabs.v4f32(<4
define <4 x double> @fabs_v4f64(<4 x double> %p) {
; X32-LABEL: fabs_v4f64:
; X32: # BB#0:
-; X32-NEXT: vandpd .LCPI2_0, %ymm0, %ymm0
+; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: fabs_v4f64:
; X64: # BB#0:
-; X64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT: retq
%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
ret <4 x double> %t
@@ -52,7 +52,7 @@ declare <4 x double> @llvm.fabs.v4f64(<4
define <8 x float> @fabs_v8f32(<8 x float> %p) {
; X32-LABEL: fabs_v8f32:
; X32: # BB#0:
-; X32-NEXT: vandps .LCPI3_0, %ymm0, %ymm0
+; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: fabs_v8f32:
More information about the llvm-commits
mailing list