[llvm] r287171 - [x86] allow FP-logic ops when one operand is FP and result is FP
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 14:34:06 PST 2016
Author: spatel
Date: Wed Nov 16 16:34:05 2016
New Revision: 287171
URL: http://llvm.org/viewvc/llvm-project?rev=287171&view=rev
Log:
[x86] allow FP-logic ops when one operand is FP and result is FP
We save an inter-register file move this way. If there's any CPU where
the FP logic is slower, we could transform this back to int-logic in
MachineCombiner.
This helps, but doesn't solve, PR6137:
https://llvm.org/bugs/show_bug.cgi?id=6137
The 'andn' test shows that we're missing a pattern match to
recognize the xor with -1 constant as a 'not' op.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll
llvm/trunk/test/CodeGen/X86/fp-logic.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=287171&r1=287170&r2=287171&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 16 16:34:05 2016
@@ -26971,11 +26971,10 @@ static SDValue combineBitcast(SDNode *N,
}
// Convert a bitcasted integer logic operation that has one bitcasted
- // floating-point operand and one constant operand into a floating-point
- // logic operation. This may create a load of the constant, but that is
- // cheaper than materializing the constant in an integer register and
- // transferring it to an SSE register or transferring the SSE operand to
- // integer register and back.
+ // floating-point operand into a floating-point logic operation. This may
+ // create a load of a constant, but that is cheaper than materializing the
+ // constant in an integer register and transferring it to an SSE register or
+ // transferring the SSE operand to integer register and back.
unsigned FPOpcode;
switch (N0.getOpcode()) {
case ISD::AND: FPOpcode = X86ISD::FAND; break;
@@ -26983,20 +26982,33 @@ static SDValue combineBitcast(SDNode *N,
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
default: return SDValue();
}
- if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
- (Subtarget.hasSSE2() && VT == MVT::f64)) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
- N0.getOperand(0).getOpcode() == ISD::BITCAST &&
- N0.getOperand(0).getOperand(0).getValueType() == VT) {
- SDValue N000 = N0.getOperand(0).getOperand(0);
- SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
- return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+
+ if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
+ (Subtarget.hasSSE2() && VT == MVT::f64)))
+ return SDValue();
+
+ SDValue LogicOp0 = N0.getOperand(0);
+ SDValue LogicOp1 = N0.getOperand(1);
+ SDLoc DL0(N0);
+
+ // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
+ if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
+ LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
+ !isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
+ SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
+ return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
+ }
+ // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
+ if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
+ LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
+ !isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
+ SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
+ return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
}
return SDValue();
}
-
// Match a binop + shuffle pyramid that represents a horizontal reduction over
// the elements of a vector.
// Returns the vector that is being reduced on, or SDValue() if a reduction
Modified: llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll?rev=287171&r1=287170&r2=287171&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic-replace.ll Wed Nov 16 16:34:05 2016
@@ -29,20 +29,16 @@ define double @FsANDPSrr(double %x, doub
define double @FsANDNPSrr(double %x, double %y) {
; SSE-LABEL: FsANDNPSrr:
; SSE: # BB#0:
-; SSE-NEXT: movd %xmm0, %rax
-; SSE-NEXT: movd %xmm1, %rcx
-; SSE-NEXT: notq %rcx
-; SSE-NEXT: andq %rax, %rcx
-; SSE-NEXT: movd %rcx, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT: xorpd %xmm1, %xmm2
+; SSE-NEXT: andpd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: FsANDNPSrr:
; AVX: # BB#0:
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: vmovq %xmm1, %rcx
-; AVX-NEXT: notq %rcx
-; AVX-NEXT: andq %rax, %rcx
-; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT: vxorpd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
%bc1 = bitcast double %x to i64
Modified: llvm/trunk/test/CodeGen/X86/fp-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic.ll?rev=287171&r1=287170&r2=287171&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic.ll Wed Nov 16 16:34:05 2016
@@ -3,13 +3,9 @@
; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
; f1, f2, f3, and f4 should use an integer logic instruction.
-; f9 and f10 should use an FP (SSE) logic instruction.
+; f5, f6, f9, and f10 should use an FP (SSE) logic instruction.
;
-; f5, f6, f7, and f8 are less clear.
-;
-; For f5 and f6, we can save a register move by using an FP logic instruction,
-; but we may need to calculate the relative costs of an SSE op vs. int op vs.
-; scalar <-> SSE register moves.
+; f7 and f8 are less clear.
;
; For f7 and f8, the SSE instructions don't take immediate operands, so if we
; use one of those, we either have to load a constant from memory or move the
@@ -79,9 +75,8 @@ define i32 @f4(float %x) {
define float @f5(float %x, i32 %y) {
; CHECK-LABEL: f5:
; CHECK: # BB#0:
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: andl %edi, %eax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movd %edi, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast float %x to i32
@@ -95,9 +90,8 @@ define float @f5(float %x, i32 %y) {
define float @f6(float %x, i32 %y) {
; CHECK-LABEL: f6:
; CHECK: # BB#0:
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: andl %edi, %eax
-; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movd %edi, %xmm1
+; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
%bc1 = bitcast float %x to i32
More information about the llvm-commits
mailing list