[PATCH] D24221: AVX512F: FMA intrinsic + FNEG - sequence optimization

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 4 16:31:34 PDT 2016


spatel added inline comments.

================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30304
@@ +30303,3 @@
+///
+/// AVX512F does not has FXOR, so FNEG is lowered as
+/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
----------------
has -> have

================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30359-30361
@@ +30358,5 @@
+  SDValue Arg;
+  bool IsFNEGNode = isFNEG(N, &Arg);
+  if (!IsFNEGNode)
+    llvm_unreachable("Expected FNEG node");
+ 
----------------
Can this be an assert?
assert(IsFNEGNode && "Expected FNEG node"); 

================
Comment at: ../lib/Target/X86/X86ISelLowering.cpp:30385-30422
@@ -30347,28 +30384,40 @@
   if (Arg.hasOneUse()) {
     switch (Arg.getOpcode()) {
     case X86ISD::FMADD:
-      return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+                                        Arg.getOperand(1), Arg.getOperand(2)));
     case X86ISD::FMSUB:
-      return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
+                                        Arg.getOperand(1), Arg.getOperand(2)));
     case X86ISD::FNMADD:
-      return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
+                                        Arg.getOperand(1), Arg.getOperand(2)));
     case X86ISD::FNMSUB:
-      return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
+                                        Arg.getOperand(1), Arg.getOperand(2)));
     case X86ISD::FMADD_RND:
-      return DAG.getNode(X86ISD::FNMSUB_RND, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FNMSUB_RND, DL, VT,
+                                        Arg.getOperand(0), Arg.getOperand(1),
+                                        Arg.getOperand(2), Arg.getOperand(3)));
     case X86ISD::FMSUB_RND:
-      return DAG.getNode(X86ISD::FNMADD_RND, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FNMADD_RND, DL, VT,
+                                        Arg.getOperand(0), Arg.getOperand(1),
+                                        Arg.getOperand(2), Arg.getOperand(3)));
     case X86ISD::FNMADD_RND:
-      return DAG.getNode(X86ISD::FMSUB_RND, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FMSUB_RND, DL, VT,
+                                        Arg.getOperand(0), Arg.getOperand(1),
+                                        Arg.getOperand(2), Arg.getOperand(3)));
     case X86ISD::FNMSUB_RND:
-      return DAG.getNode(X86ISD::FMADD_RND, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2), Arg.getOperand(3));
+      return DAG.getBitcast(OrigVT,
+                            DAG.getNode(X86ISD::FMADD_RND, DL, VT,
+                                        Arg.getOperand(0), Arg.getOperand(1),
+                                        Arg.getOperand(2), Arg.getOperand(3)));
     }
   }
----------------
This would be clearer/smaller if we switched just the opcode selection and sank the common 'getBitcast(getNode())' after the switch:

  auto NewOpcode;
  switch (Arg.getOpcode()) {
  case X86ISD::FMADD: NewOpcode = X86ISD::FNMSUB; break;
  case X86ISD::FMSUB: NewOpcode = X86ISD::FNMADD; break;
  ...
  }
  return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg.getNode()->ops());

If you do that refactoring on the original code as an NFC patch, I think it will make the diff on this part of this patch just one line of code.


Repository:
  rL LLVM

https://reviews.llvm.org/D24221





More information about the llvm-commits mailing list