[clang] [llvm] [WebAssembly] Implement prototype f32.store_f16 instruction. (PR #91545)

Thu May 9 09:16:02 PDT 2024

https://github.com/brendandahl updated https://github.com/llvm/llvm-project/pull/91545

>From adcb77e15d09f466f217d754f6f80aeb729aadc4 Mon Sep 17 00:00:00 2001
From: Brendan Dahl <brendan.dahl at gmail.com>
Date: Wed, 8 May 2024 23:10:07 +0000
Subject: [PATCH 1/3] [WebAssembly] Implement prototype f32.store_f16
 instruction.

Adds a builtin and intrinsic for the f32.store_f16 instruction.

The instruction stores an f32 value as an f16 memory.
Specified at:
https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md

Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is incorrect
and will be changed to 0xFC31 soon.
---
 .../clang/Basic/BuiltinsWebAssembly.def       |  1 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  6 +++++
 clang/test/CodeGen/builtins-wasm.c            |  6 +++++
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td |  5 ++++
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    |  1 +
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  8 ++++++
 .../WebAssembly/WebAssemblyInstrMemory.td     |  4 +++
 .../CodeGen/WebAssembly/half-precision.ll     |  9 +++++++
 llvm/test/CodeGen/WebAssembly/offset.ll       | 27 +++++++++++++++++++
 llvm/test/MC/WebAssembly/simd-encodings.s     |  3 +++
 10 files changed, 70 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index cf54f8f4422f..41fadd10e943 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -192,6 +192,7 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f"
 
 // Half-Precision (fp16)
 TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
+TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "nU", "half-precision")
 
 // Reference Types builtins
 // Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e8a6bd050e17..abb644d8eb50 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21308,6 +21308,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
     return Builder.CreateCall(Callee, {Addr});
   }
+  case WebAssembly::BI__builtin_wasm_storef16_f32: {
+    Value *Val = EmitScalarExpr(E->getArg(0));
+    Value *Addr = EmitScalarExpr(E->getArg(1));
+    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
+    return Builder.CreateCall(Callee, {Val, Addr});
+  }
   case WebAssembly::BI__builtin_wasm_table_get: {
     assert(E->getArg(0)->getType()->isArrayType());
     Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index ab1c6cd494ae..bcb15969de1c 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -807,6 +807,12 @@ float load_f16_f32(__fp16 *addr) {
   // WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}})
 }
 
+void store_f16_f32(float val, __fp16 *addr) {
+  return __builtin_wasm_storef16_f32(val, addr);
+  // WEBASSEMBLY: tail call void @llvm.wasm.storef16.f32(float %val, ptr %{{.*}})
+  // WEBASSEMBLY-NEXT: ret
+}
+
 __externref_t externref_null() {
   return __builtin_wasm_ref_null_extern();
   // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index f8142a8ca9e9..572d334ac955 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -332,6 +332,11 @@ def int_wasm_loadf16_f32:
             [llvm_ptr_ty],
             [IntrReadMem, IntrArgMemOnly],
              "", [SDNPMemOperand]>;
+def int_wasm_storef16_f32:
+  Intrinsic<[],
+            [llvm_float_ty, llvm_ptr_ty],
+            [IntrWriteMem, IntrArgMemOnly],
+             "", [SDNPMemOperand]>;
 
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index d3b496ae5917..d4e9fb057c44 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -207,6 +207,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
   WASM_LOAD_STORE(LOAD_LANE_I16x8)
   WASM_LOAD_STORE(STORE_LANE_I16x8)
   WASM_LOAD_STORE(LOAD_F16_F32)
+  WASM_LOAD_STORE(STORE_F16_F32)
   return 1;
   WASM_LOAD_STORE(LOAD_I32)
   WASM_LOAD_STORE(LOAD_F32)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ed52fe53bc60..527bb4c9fbea 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -914,6 +914,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.align = Align(2);
     Info.flags = MachineMemOperand::MOLoad;
     return true;
+  case Intrinsic::wasm_storef16_f32:
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::f16;
+    Info.ptrVal = I.getArgOperand(1);
+    Info.offset = 0;
+    Info.align = Align(2);
+    Info.flags = MachineMemOperand::MOStore;
+    return true;
   default:
     return false;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index e4baf842462a..c6a379f51247 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -171,12 +171,16 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
 defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
 defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
 
+defm STORE_F16_F32 : WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;
+
 defm : StorePat<i32, truncstorei8, "STORE8_I32">;
 defm : StorePat<i32, truncstorei16, "STORE16_I32">;
 defm : StorePat<i64, truncstorei8, "STORE8_I64">;
 defm : StorePat<i64, truncstorei16, "STORE16_I64">;
 defm : StorePat<i64, truncstorei32, "STORE32_I64">;
 
+defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;
+
 multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
 // Current memory size.
 defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags),
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 582771d3f95f..89e9c42637c1 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s
 
 declare float @llvm.wasm.loadf32.f16(ptr)
+declare void @llvm.wasm.storef16.f32(float, ptr)
 
 ; CHECK-LABEL: ldf16_32:
 ; CHECK:      f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}}
@@ -10,3 +11,11 @@ define float @ldf16_32(ptr %p) {
   %v = call float @llvm.wasm.loadf16.f32(ptr %p)
   ret float %v
 }
+
+; CHECK-LABEL: stf16_32:
+; CHECK:       f32.store_f16 0($1), $0
+; CHECK-NEXT:  return
+define void @stf16_32(float %v, ptr %p) {
+  tail call void @llvm.wasm.storef16.f32(float %v, ptr %p)
+  ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll
index b497ddd7273a..65de341780e3 100644
--- a/llvm/test/CodeGen/WebAssembly/offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset.ll
@@ -692,3 +692,30 @@ define float @load_f16_f32_with_folded_gep_offset(ptr %p) {
   %t = call float @llvm.wasm.loadf16.f32(ptr %s)
   ret float %t
 }
+
+;===----------------------------------------------------------------------------
+; Stores: Half Precision
+;===----------------------------------------------------------------------------
+
+; Basic store.
+
+; CHECK-LABEL: store_f16_f32_no_offset:
+; CHECK-NEXT: .functype store_f16_f32_no_offset (i32, f32) -> (){{$}}
+; CHECK-NEXT: f32.store_f16 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @store_f16_f32_no_offset(ptr %p, float %v) {
+  call void @llvm.wasm.storef16.f32(float %v, ptr %p)
+  ret void
+}
+
+; Storing to a fixed address.
+
+; CHECK-LABEL: store_f16_f32_to_numeric_address:
+; CHECK:      i32.const $push1=, 0{{$}}
+; CHECK-NEXT: f32.const $push0=, 0x0p0{{$}}
+; CHECK-NEXT: f32.store_f16 42($pop1), $pop0{{$}}
+define void @store_f16_f32_to_numeric_address() {
+  %s = inttoptr i32 42 to ptr
+  call void @llvm.wasm.storef16.f32(float 0.0, ptr %s)
+  ret void
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index e7c3761f381d..57fa71e74b8d 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -842,4 +842,7 @@ main:
     # CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30]
     f32.load_f16 48
 
+    # CHECK: f32.store_f16 32 # encoding: [0xfc,0x31,0x01,0x20]
+    f32.store_f16 32
+
     end_function

>From 9b8832b421b8f1e9ca2f6a37a73bbc78b0a2331f Mon Sep 17 00:00:00 2001
From: Brendan Dahl <brendan.dahl at gmail.com>
Date: Thu, 9 May 2024 09:13:39 -0700
Subject: [PATCH 2/3] Update
 llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

Co-authored-by: Heejin Ahn <aheejin at gmail.com>
---
 llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index c6a379f51247..8d106686a174 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -171,7 +171,9 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
 defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
 defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
 
-defm STORE_F16_F32 : WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;
+// Half-precision store.
+defm STORE_F16_F32 :
+  WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;
 
 defm : StorePat<i32, truncstorei8, "STORE8_I32">;
 defm : StorePat<i32, truncstorei16, "STORE16_I32">;

>From 2ff7c80a74d0f692468d0adf532cd4a865fdced7 Mon Sep 17 00:00:00 2001
From: Brendan Dahl <brendan.dahl at gmail.com>
Date: Thu, 9 May 2024 16:15:48 +0000
Subject: [PATCH 3/3] Review comment.

---
 llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 8d106686a174..68b37bb09dd5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -181,6 +181,7 @@ defm : StorePat<i64, truncstorei8, "STORE8_I64">;
 defm : StorePat<i64, truncstorei16, "STORE16_I64">;
 defm : StorePat<i64, truncstorei32, "STORE32_I64">;
 
+// Half-precision store.
 defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;
 
 multiclass MemoryOps<WebAssemblyRegClass rc, string B> {