[llvm] ac47339 - [SystemZ] Fix 128-bit strict FMA expansion pre-z14

Wed Dec 11 07:32:34 PST 2019

Author: Ulrich Weigand
Date: 2019-12-11T16:32:08+01:00
New Revision: ac473394ff04ac3e33c15b8358b68a78834b8424

URL: https://github.com/llvm/llvm-project/commit/ac473394ff04ac3e33c15b8358b68a78834b8424
DIFF: https://github.com/llvm/llvm-project/commit/ac473394ff04ac3e33c15b8358b68a78834b8424.diff

LOG: [SystemZ] Fix 128-bit strict FMA expansion pre-z14

Before z14, we did not have any FMA instruction for 128-bit
floating-point, so the @llvm.fma.f128 intrinsic needs to be
expanded to a libcall on those platforms.

This worked correctly for regular FMA, but was implemented
incorrectly for the strict version.  This was not noticed
because we did not have test coverage for this case.

This patch fixes that incorrect expansion and adds the
missing test cases.

Added: 
    llvm/test/CodeGen/SystemZ/fp-mul-13.ll
    llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll
    llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll

Modified: 
    llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 36f460619298..25690a2ef73c 100644

--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -570,13 +570,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     }
   }
 
-  // We have fused multiply-addition for f32 and f64 but not f128.
-  setOperationAction(ISD::FMA, MVT::f32,  Legal);
-  setOperationAction(ISD::FMA, MVT::f64,  Legal);
-  if (Subtarget.hasVectorEnhancements1())
-    setOperationAction(ISD::FMA, MVT::f128, Legal);
-  else
+  // We only have fused f128 multiply-addition on vector registers.
+  if (!Subtarget.hasVectorEnhancements1()) {
     setOperationAction(ISD::FMA, MVT::f128, Expand);
+    setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
+  }
 
   // We don't have a copysign instruction on vector registers.
   if (Subtarget.hasVectorEnhancements1())

diff  --git a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
new file mode 100644
index 000000000000..4475195cc547
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, fmal
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %res = call fp128 @llvm.fma.f128 (fp128 %f1, fp128 %f2, fp128 %f3)
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+

diff  --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll
new file mode 100644
index 000000000000..3af5efb6c9c8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-12.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+                        fp128 %f1, fp128 %f2, fp128 %f3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+define void @f2(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %neg = fsub fp128 0xL00000000000000008000000000000000, %f3
+  %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+                        fp128 %f1, fp128 %f2, fp128 %neg,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+define void @f3(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f3:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfnmaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+                        fp128 %f1, fp128 %f2, fp128 %f3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  %negres = fsub fp128 0xL00000000000000008000000000000000, %res
+  store fp128 %negres, fp128 *%dst
+  ret void
+}
+
+define void @f4(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f4:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK-DAG: vl [[REG3:%v[0-9]+]], 0(%r4)
+; CHECK: wfnmsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], [[REG3]]
+; CHECK: vst [[RES]], 0(%r5)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %neg = fsub fp128 0xL00000000000000008000000000000000, %f3
+  %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+                        fp128 %f1, fp128 %f2, fp128 %neg,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  %negres = fsub fp128 0xL00000000000000008000000000000000, %res
+  store fp128 %negres, fp128 *%dst
+  ret void
+}
+
+attributes #0 = { strictfp }
+

diff  --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
new file mode 100644
index 000000000000..32f609bb26eb
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2, fp128 *%ptr3, fp128 *%dst) #0 {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, fmal
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %f3 = load fp128, fp128 *%ptr3
+  %res = call fp128 @llvm.experimental.constrained.fma.f128 (
+                        fp128 %f1, fp128 %f2, fp128 %f3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict") #0
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+attributes #0 = { strictfp }
+