[clang] [llvm] [SystemZ] Add support for half (fp16) (PR #109164)

Wed Sep 18 08:45:46 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-systemz

Author: Jonas Paulsson (JonPsson1)

<details>
<summary>Changes</summary>

Make sure that fp16<=>float conversions are expanded to libcalls and that 16-bit fp values can be loaded and stored properly via GPRs. With this patch the Half IR Type used in operations should be handled correctly with the help of pre-existing ISD node expansions. 

Patch in progress...

Notes:
```

`Clang FE:

TargetInfo {
  /// Determine whether the _Float16 type is supported on this target.
  bool HasFloat16;
  ; If false gives an error message on _Float16 in C program.

  bool HasLegalHalfType; // True if the backend supports operations on the half
                         // LLVM IR type. 
  ; If false, Half:s are extended and ops are done in float, if true, ops are
  ; done in Half (by Clang). 

  -ffloat16-excess-precision=[standard,fast,none]
  "Allows control over excess precision on targets where native support for the
   precision types is not available. By default, excess precision is used to
   calculate intermediate results following the rules specified in ISO C99."
  ; => Even though we need to deal with Half operations coming from other
       languages in the BE, we still should to let Clang insert the required
       emulation (trunc/extend) instructions as required by C (_Float16). So
       HasLegalHalfType needs to be set to 'false'.
  ; => C code will have fpext/fptrunc inserted in many places to emulate
       _Float16, and operations are done in Float.
  ; => Other languages will emit Half operations, which has to be emulated by
       fpext/fptrunc in BE and then done in Float.

  /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used
  /// to convert to and from __fp16.
  /// FIXME: This function should be removed once all targets stop using the
  /// conversion intrinsics.
  virtual bool useFP16ConversionIntrinsics() const {
    return true;
  }
  ; Use either conversion intrinsics or fpext/fptrunc from Clang.
  ; => Given the comment and the fact that other languages emit 'half' it
  ;    seems ideal to not use these.

  bool HalfArgsAndReturns;
  ; Should be true if ABI says that half values are passed / returned.
  ; - What does the SystemZ ABI require? Pass/return in float regs?
}

Middle End:
 ; Middle-End does not do much especially with half:s/conversion intrinsics it
   seems (some constant folding).

  ; InstCombiner removed an fptrunc before sub and converted the Float fsub
    to a Half fsub. => Middle end does not (at least currently) seem to care
    about the Clang HasLegalHalfType flag.

CodeGen:
  ; Common-code expansions available:
  ; The expansion of ISD::FP16_TO_FP / FP_TO_FP16 generates libcalls.
  ; The expansion of extloads/truncstores handles these as integer values
    in conjunction with the libcalls.

  ; Library calls:
    LLVM libcalls: llvm/include/llvm/IR/RuntimeLibcalls.def
    got 'undefined reference' from linker at first try...

  Conversions:
   - could NNP instructions (z16) be used (vcfn / vcnf)?
     (clang/test/CodeGen/SystemZ/builtins-systemz-zvector4.c)

- There are also corresponding strict fp nodes that probably should be handled
   as well just the same.

- The exact semantics of _Float16 in C is hopefully handled by Clang FE per the value of -ffloat16-excess-precision.
`
```

---
Full diff: https://github.com/llvm/llvm-project/pull/109164.diff


3 Files Affected:

- (modified) clang/lib/Basic/Targets/SystemZ.h (+9) 
- (modified) llvm/lib/Target/SystemZ/SystemZISelLowering.cpp (+7) 
- (added) llvm/test/CodeGen/SystemZ/fp-half.ll (+100) 


``````````diff

diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index f05ea473017bec..6566b63d4587ee 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -91,11 +91,20 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
                       "-v128:64-a:8:16-n32:64");
     }
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;
+
+    HasLegalHalfType = false;    // Default=false
+    HalfArgsAndReturns = false;  // Default=false
+    HasFloat16 = true;           // Default=false
+
     HasStrictFP = true;
   }
 
   unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override;
 
+  bool useFP16ConversionIntrinsics() const override {
+    return false;
+  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 582a8c139b2937..fd3dcebba1eca7 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -704,6 +704,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BITCAST, MVT::f32, Custom);
   }
 
+  // Expand FP16 <=> FP32 conversions to libcalls and handle FP16 loads and
+  // stores in GPRs.
+  setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+  setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+
   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
   // structure, but VAEND is a no-op.
   setOperationAction(ISD::VASTART, MVT::Other, Custom);
diff --git a/llvm/test/CodeGen/SystemZ/fp-half.ll b/llvm/test/CodeGen/SystemZ/fp-half.ll
new file mode 100644
index 00000000000000..393ba2f620ff6e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-half.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+;
+; Tests for FP16 (Half).
+
+; A function where everything is done in Half.
+define void @fun0(ptr %Op0, ptr %Op1, ptr %Dst) {
+; CHECK-LABEL: fun0:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -168
+; CHECK-NEXT:    .cfi_def_cfa_offset 328
+; CHECK-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset %f8, -168
+; CHECK-NEXT:    llgh %r2, 0(%r2)
+; CHECK-NEXT:    lgr %r13, %r4
+; CHECK-NEXT:    lgr %r12, %r3
+; CHECK-NEXT:    brasl %r14, __gnu_h2f_ieee at PLT
+; CHECK-NEXT:    llgh %r2, 0(%r12)
+; CHECK-NEXT:    ler %f8, %f0
+; CHECK-NEXT:    brasl %r14, __gnu_h2f_ieee at PLT
+; CHECK-NEXT:    aebr %f0, %f8
+; CHECK-NEXT:    brasl %r14, __gnu_f2h_ieee at PLT
+; CHECK-NEXT:    sth %r2, 0(%r13)
+; CHECK-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; CHECK-NEXT:    lmg %r12, %r15, 264(%r15)
+; CHECK-NEXT:    br %r14
+entry:
+  %0 = load half, ptr %Op0, align 2
+  %1 = load half, ptr %Op1, align 2
+  %add = fadd half %0, %1
+  store half %add, ptr %Dst, align 2
+  ret void
+}
+
+; A function where Half values are loaded and extended to float and then
+; operated on.
+define void @fun1(ptr %Op0, ptr %Op1, ptr %Dst) {
+; CHECK-LABEL: fun1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
+; CHECK-NEXT:    .cfi_offset %r12, -64
+; CHECK-NEXT:    .cfi_offset %r13, -56
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -168
+; CHECK-NEXT:    .cfi_def_cfa_offset 328
+; CHECK-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset %f8, -168
+; CHECK-NEXT:    llgh %r2, 0(%r2)
+; CHECK-NEXT:    lgr %r13, %r4
+; CHECK-NEXT:    lgr %r12, %r3
+; CHECK-NEXT:    brasl %r14, __gnu_h2f_ieee at PLT
+; CHECK-NEXT:    llgh %r2, 0(%r12)
+; CHECK-NEXT:    ler %f8, %f0
+; CHECK-NEXT:    brasl %r14, __gnu_h2f_ieee at PLT
+; CHECK-NEXT:    aebr %f0, %f8
+; CHECK-NEXT:    brasl %r14, __gnu_f2h_ieee at PLT
+; CHECK-NEXT:    sth %r2, 0(%r13)
+; CHECK-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; CHECK-NEXT:    lmg %r12, %r15, 264(%r15)
+; CHECK-NEXT:    br %r14
+entry:
+  %0 = load half, ptr %Op0, align 2
+  %ext = fpext half %0 to float
+  %1 = load half, ptr %Op1, align 2
+  %ext1 = fpext half %1 to float
+  %add = fadd float %ext, %ext1
+  %res = fptrunc float %add to half
+  store half %res, ptr %Dst, align 2
+  ret void
+}
+
+; Test case with a Half incoming argument.
+define zeroext i1 @fun2(half noundef %f) {
+; CHECK-LABEL: fun2:
+; CHECK:       # %bb.0: # %start
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    .cfi_offset %r14, -48
+; CHECK-NEXT:    .cfi_offset %r15, -40
+; CHECK-NEXT:    aghi %r15, -160
+; CHECK-NEXT:    .cfi_def_cfa_offset 320
+; CHECK-NEXT:    brasl %r14, __gnu_f2h_ieee at PLT
+; CHECK-NEXT:    brasl %r14, __gnu_h2f_ieee at PLT
+; CHECK-NEXT:    larl %r1, .LCPI2_0
+; CHECK-NEXT:    deb %f0, 0(%r1)
+; CHECK-NEXT:    brasl %r14, __gnu_f2h_ieee at PLT
+; CHECK-NEXT:    risbg %r2, %r2, 63, 191, 49
+; CHECK-NEXT:    lmg %r14, %r15, 272(%r15)
+; CHECK-NEXT:    br %r14
+start:
+  %self = fdiv half %f, 0xHC700
+  %_4 = bitcast half %self to i16
+  %_0 = icmp slt i16 %_4, 0
+  ret i1 %_0
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/109164