[llvm] r333221 - [InstCombine] Enable more reassociations using FMF 'reassoc' + 'nsz'

Thu May 24 13:16:43 PDT 2018

Author: wristow
Date: Thu May 24 13:16:43 2018
New Revision: 333221

URL: http://llvm.org/viewvc/llvm-project?rev=333221&view=rev
Log:
[InstCombine] Enable more reassociations using FMF 'reassoc' + 'nsz'

Reassociation of math ops in some contexts (especially vector contexts)
has generally only been happening when the 'fast' FMF was set.  This
enables reassoication when only the finer grained controls 'reassoc' and
'nsz' are set.

Differential Revision: https://reviews.llvm.org/D47335

Modified:
    llvm/trunk/lib/IR/Instruction.cpp
    llvm/trunk/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll

Modified: llvm/trunk/lib/IR/Instruction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Instruction.cpp?rev=333221&r1=333220&r2=333221&view=diff
==============================================================================

--- llvm/trunk/lib/IR/Instruction.cpp (original)
+++ llvm/trunk/lib/IR/Instruction.cpp Thu May 24 13:16:43 2018
@@ -602,7 +602,8 @@ bool Instruction::isAssociative() const
   switch (Opcode) {
   case FMul:
   case FAdd:
-    return cast<FPMathOperator>(this)->isFast();
+    return cast<FPMathOperator>(this)->hasAllowReassoc() &&
+           cast<FPMathOperator>(this)->hasNoSignedZeros();
   default:
     return false;
   }

Modified: llvm/trunk/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll?rev=333221&r1=333220&r2=333221&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll Thu May 24 13:16:43 2018
@@ -1,10 +1,145 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: mul
-; CHECK: mul
 
-define <4 x float> @test(<4 x float> %V) {
-        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
-        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fmul(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
 }
 
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fmul_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V * C1) * C2 => V * (C1 * C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
+define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fmul_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadd(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'fast'
+define <4 x float> @test_fadd_fast(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required)
+define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], <float 2.000000e+00, float 4.000000e+00, float 0.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; (V + C1) + C2 => V + (C1 + C2)
+; TODO: This doesn't require 'nsz'.  It should fold to V + { 2.0, 4.0, 0.0, 8.0 }
+define <4 x float> @test_fadd_reassoc(<4 x float> %V) {
+; CHECK-LABEL: @test_fadd_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+        %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 >
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this doesn't fold when no fast-math-flags are specified
+define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'fast'
+define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_fast(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd fast <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required)
+define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
+        %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc nsz <4 x float> %X, %Y
+        ret <4 x float> %Z
+}
+
+; ( A + C1 ) + ( B + -C1 )
+; Verify the fold is not done with only 'reassoc' ('nsz' is required).
+define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @test_fadds_cancel_reassoc(
+; CHECK-NEXT:     [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+; CHECK-NEXT:     [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], <float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00>
+; CHECK-NEXT:     [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:     ret <4 x float> [[TMP3]]
+        %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
+        %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 >
+        %Z = fadd reassoc <4 x float> %X, %Y
+        ret <4 x float> %Z
+}