[PATCH] D24221: AVX512F: FMA intrinsic + FNEG - sequence optimization
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 4 16:31:34 PDT 2016
spatel added inline comments.
================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30304
@@ +30303,3 @@
+///
+/// AVX512F does not has FXOR, so FNEG is lowered as
+/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
----------------
has -> have
================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30359-30361
@@ +30358,5 @@
+ SDValue Arg;
+ bool IsFNEGNode = isFNEG(N, &Arg);
+ if (!IsFNEGNode)
+ llvm_unreachable("Expected FNEG node");
+
----------------
Can this be an assert?
assert(IsFNEGNode && "Expected FNEG node");
================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30385-30422
@@ -30347,28 +30384,40 @@
if (Arg.hasOneUse()) {
switch (Arg.getOpcode()) {
case X86ISD::FMADD:
- return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2)));
case X86ISD::FMSUB:
- return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2)));
case X86ISD::FNMADD:
- return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2)));
case X86ISD::FNMSUB:
- return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2)));
case X86ISD::FMADD_RND:
- return DAG.getNode(X86ISD::FNMSUB_RND, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FNMSUB_RND, DL, VT,
+ Arg.getOperand(0), Arg.getOperand(1),
+ Arg.getOperand(2), Arg.getOperand(3)));
case X86ISD::FMSUB_RND:
- return DAG.getNode(X86ISD::FNMADD_RND, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FNMADD_RND, DL, VT,
+ Arg.getOperand(0), Arg.getOperand(1),
+ Arg.getOperand(2), Arg.getOperand(3)));
case X86ISD::FNMADD_RND:
- return DAG.getNode(X86ISD::FMSUB_RND, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FMSUB_RND, DL, VT,
+ Arg.getOperand(0), Arg.getOperand(1),
+ Arg.getOperand(2), Arg.getOperand(3)));
case X86ISD::FNMSUB_RND:
- return DAG.getNode(X86ISD::FMADD_RND, DL, VT, Arg.getOperand(0),
- Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+ return DAG.getBitcast(OrigVT,
+ DAG.getNode(X86ISD::FMADD_RND, DL, VT,
+ Arg.getOperand(0), Arg.getOperand(1),
+ Arg.getOperand(2), Arg.getOperand(3)));
}
}
----------------
This would be clearer/smaller if we switched just the opcode selection and sank the common 'getBitcast(getNode())' after the switch:
auto NewOpcode;
switch (Arg.getOpcode()) {
case X86ISD::FMADD: NewOpcode = X86ISD::FNMSUB; break;
case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break;
...
}
return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg.getNode()->ops());
If you do that refactoring on the original code as an NFC patch, I think it will make the diff on this part of this patch just one line of code.
Repository:
rL LLVM
https://reviews.llvm.org/D24221
More information about the llvm-commits
mailing list