[PATCH] D4583: don't transform splats of vector FP (PR20358)

Sat Nov 21 10:21:34 PST 2015

spatel updated the summary for this revision.
spatel added a reviewer: RKSimon.
spatel updated this revision to Diff 40871.
spatel added a comment.

Patch updated:
It's likely that denorms are not a concern in a -ffast-math setting, so limit this patch to only fire (ie, bail out of the transform) on FP ops that are not 'fast'.

As noted in the bug report, there are now known security exploits related to denorm execution (!), so there's another reason for this patch besides the potentially giant performance loss:
http://cseweb.ucsd.edu/~dkohlbre/papers/subnormal.pdf

Also - and I may be going for a world-record here - it's been ~490 days, so...

Ping. :)


http://reviews.llvm.org/D4583

Files:
  lib/Transforms/InstCombine/InstructionCombining.cpp
  test/Transforms/InstCombine/vec_shuffle.ll

Index: test/Transforms/InstCombine/vec_shuffle.ll
===================================================================

--- test/Transforms/InstCombine/vec_shuffle.ll
+++ test/Transforms/InstCombine/vec_shuffle.ll
@@ -310,11 +310,30 @@
   ret <4 x i32> %r
 }
 
+; If the FP operation is 'fast', hoist it to eliminate a shuffle.
+ 
 define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
 ; CHECK-LABEL: @shuffle_17fsub(
-; CHECK-NOT: shufflevector
-; CHECK: fsub <4 x float> %v1, %v2
-; CHECK: shufflevector
+; CHECK-NEXT: fsub <4 x float> %v1, %v2
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret <4 x float>
+  %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
+                      <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %r = fsub fast <4 x float> %t1, %t2
+  ret <4 x float> %r
+}
+
+; If the FP operation is not 'fast', do not risk operating on denormals:
+; https://llvm.org/bugs/show_bug.cgi?id=20358
+
+define <4 x float> @pr20358(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @pr20358(
+; CHECK-NEXT:  %t1 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:  %t2 = shufflevector <4 x float> %v2, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:  %r = fsub <4 x float> %t1, %t2
+; CHECK-NEXT:  ret <4 x float> %r
   %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
                       <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
Index: lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- lib/Transforms/InstCombine/InstructionCombining.cpp
+++ lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1245,6 +1245,7 @@
 /// specified one but with other operands.
 static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
                                  InstCombiner::BuilderTy *B) {
+  // FIXME: Propagate fast-math-flags.
   Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
   if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
     if (isa<OverflowingBinaryOperator>(NewBO)) {
@@ -1270,6 +1271,20 @@
   if (!isSafeToSpeculativelyExecute(&Inst))
     return nullptr;
 
+  // The shuffle transformations below may create floating-point math operations
+  // on values not specified by the program. Those values may include denormals.
+  // Operating on denormals may be extremely expensive or dangerous (PR20358).
+  // If unsafe algebra is not allowed, bail out to avoid that possibility.
+
+  // TODO: There is no direct connection between unsafe algebra and denormal
+  // handling, but it is likely that an unsafe FP environment is also an
+  // environment where denormals are automatically flushed to zero. If support
+  // for detecting/changing the FP environment is added, this check should be
+  // improved to directly query the settings for denormals.
+  
+  if (isa<FPMathOperator>(Inst) && !Inst.hasUnsafeAlgebra())
+    return nullptr;
+
   unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
   Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
   assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4583.40871.patch
Type: text/x-patch
Size: 3453 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151121/1d191498/attachment.bin>