[llvm] 786c687 - [AArch64] Add support for FMA intrinsics to shouldSinkOperands.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri May 27 02:37:21 PDT 2022


Author: Florian Hahn
Date: 2022-05-27T10:37:03+01:00
New Revision: 786c687810a5e3db4c64312018de25c65527c40c

URL: https://github.com/llvm/llvm-project/commit/786c687810a5e3db4c64312018de25c65527c40c
DIFF: https://github.com/llvm/llvm-project/commit/786c687810a5e3db4c64312018de25c65527c40c.diff

LOG: [AArch64] Add support for FMA intrinsics to shouldSinkOperands.

If the fma operates on a legal vector type, the indexed variants can be
used, if the second operand is a splat of a valid index.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D126234

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e31a58da08313..d31008496ea4f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12545,6 +12545,11 @@ bool AArch64TargetLowering::shouldSinkOperands(
       }
       LLVM_FALLTHROUGH;
 
+    case Intrinsic::fma:
+      if (cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
+          !Subtarget->hasFullFP16())
+        return false;
+      LLVM_FALLTHROUGH;
     case Intrinsic::aarch64_neon_sqdmull:
     case Intrinsic::aarch64_neon_sqdmulh:
     case Intrinsic::aarch64_neon_sqrdmulh:
@@ -12568,7 +12573,6 @@ bool AArch64TargetLowering::shouldSinkOperands(
       Ops.push_back(&II->getArgOperandUse(0));
       Ops.push_back(&II->getArgOperandUse(1));
       return true;
-
     default:
       return false;
     }

diff  --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
index 5d7a26f657845..fc60b119225c4 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -codegenprepare -S | FileCheck %s
+; RUN: opt < %s -codegenprepare -S | FileCheck --check-prefixes=CHECK,NOFP16 %s
+; RUN: opt < %s -codegenprepare -S -mattr=+fullfp16 | FileCheck --check-prefixes=CHECK,FULLFP16 %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown"
@@ -498,29 +499,53 @@ if.else:
 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
 
 define <8 x half> @sink_shufflevector_fma_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) {
-; CHECK-LABEL: @sink_shufflevector_fma_v8f16(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
-; CHECK-NEXT:    ret <8 x half> [[R_3]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
-; CHECK-NEXT:    [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
-; CHECK-NEXT:    ret <8 x half> [[R_7]]
+; NOFP16-LABEL: @sink_shufflevector_fma_v8f16(
+; NOFP16-NEXT:  entry:
+; NOFP16-NEXT:    [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; NOFP16-NEXT:    [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; NOFP16-NEXT:    [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; NOFP16-NEXT:    [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; NOFP16-NEXT:    [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+; NOFP16-NEXT:    [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; NOFP16-NEXT:    [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+; NOFP16-NEXT:    [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; NOFP16-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; NOFP16:       if.then:
+; NOFP16-NEXT:    [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
+; NOFP16-NEXT:    ret <8 x half> [[R_3]]
+; NOFP16:       if.else:
+; NOFP16-NEXT:    [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
+; NOFP16-NEXT:    [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
+; NOFP16-NEXT:    ret <8 x half> [[R_7]]
+;
+; FULLFP16-LABEL: @sink_shufflevector_fma_v8f16(
+; FULLFP16-NEXT:  entry:
+; FULLFP16-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; FULLFP16:       if.then:
+; FULLFP16-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; FULLFP16-NEXT:    [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; FULLFP16-NEXT:    [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[TMP1]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; FULLFP16-NEXT:    [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[TMP2]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; FULLFP16-NEXT:    [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[TMP3]], <8 x half> [[B]])
+; FULLFP16-NEXT:    ret <8 x half> [[R_3]]
+; FULLFP16:       if.else:
+; FULLFP16-NEXT:    [[TMP4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+; FULLFP16-NEXT:    [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[TMP4]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; FULLFP16-NEXT:    [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[TMP5]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+; FULLFP16-NEXT:    [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[TMP6]], <8 x half> [[B]])
+; FULLFP16-NEXT:    [[TMP7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FULLFP16-NEXT:    [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[TMP7]], <8 x half> [[B]])
+; FULLFP16-NEXT:    ret <8 x half> [[R_7]]
 ;
 entry:
   %s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer
@@ -553,18 +578,18 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
 define <4 x float> @sink_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @sink_shufflevector_fma_v4f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S0]], <4 x float> [[B]])
-; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[S1]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[TMP1]], <4 x float> [[B]])
 ; CHECK-NEXT:    ret <4 x float> [[R_1]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[S2]], <4 x float> [[B]])
-; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[S3]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[TMP2]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[TMP3]], <4 x float> [[B]])
 ; CHECK-NEXT:    ret <4 x float> [[R_3]]
 ;
 entry:
@@ -588,18 +613,18 @@ if.else:
 define <4 x float> @sink_shufflevector_first_arg_fma_v4f3(i1 %c, <8 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @sink_shufflevector_first_arg_fma_v4f3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S0]], <4 x float> [[B:%.*]], <4 x float> [[B]])
-; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S1]], <4 x float> [[R_0]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP0]], <4 x float> [[B:%.*]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[R_0]], <4 x float> [[B]])
 ; CHECK-NEXT:    ret <4 x float> [[R_1]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S2]], <4 x float> [[B]], <4 x float> [[B]])
-; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[S3]], <4 x float> [[R_2]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP2]], <4 x float> [[B]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP3]], <4 x float> [[R_2]], <4 x float> [[B]])
 ; CHECK-NEXT:    ret <4 x float> [[R_3]]
 ;
 entry:
@@ -627,14 +652,14 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
 define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @sink_shufflevector_fma_v2f64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[S0]], <2 x double> [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[TMP0]], <2 x double> [[B]])
 ; CHECK-NEXT:    ret <2 x double> [[R_0]]
 ; CHECK:       if.else:
-; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[S1]], <2 x double> [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[TMP1]], <2 x double> [[B]])
 ; CHECK-NEXT:    ret <2 x double> [[R_1]]
 ;
 entry:
@@ -654,10 +679,10 @@ if.else:
 define <4 x float> @do_not_sink_out_of_range_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[S4]], <4 x float> [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    ret <4 x float> zeroinitializer
@@ -679,20 +704,20 @@ declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>)
 define <5 x float> @sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) {
 ; CHECK-LABEL: @sink_shufflevector_fma_v5f32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> zeroinitializer
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
 ; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
 ; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[S0]], <5 x float> [[B]])
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> zeroinitializer
+; CHECK-NEXT:    [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[TMP0]], <5 x float> [[B]])
 ; CHECK-NEXT:    [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
 ; CHECK-NEXT:    ret <5 x float> [[R_1]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
 ; CHECK-NEXT:    [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
-; CHECK-NEXT:    [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[S4]], <5 x float> [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[TMP1]], <5 x float> [[B]])
 ; CHECK-NEXT:    ret <5 x float> [[R_4]]
 ;
 entry:


        


More information about the llvm-commits mailing list