[clang] 8a3277a - [WebAssembly] Implement prototype f32.store_f16 instruction. (#91545)
via cfe-commits
cfe-commits at lists.llvm.org
Thu May 9 15:38:17 PDT 2024
Author: Brendan Dahl
Date: 2024-05-09T15:38:13-07:00
New Revision: 8a3277acbc7b7af917f570f7d6430dda41c4d9ee
URL: https://github.com/llvm/llvm-project/commit/8a3277acbc7b7af917f570f7d6430dda41c4d9ee
DIFF: https://github.com/llvm/llvm-project/commit/8a3277acbc7b7af917f570f7d6430dda41c4d9ee.diff
LOG: [WebAssembly] Implement prototype f32.store_f16 instruction. (#91545)
Adds a builtin and intrinsic for the f32.store_f16 instruction.
The instruction stores an f32 value as an f16 memory. Specified at:
https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md
Note: the current spec has f32.store_f16 as opcode 0xFD0121, but this is
incorrect and will be changed to 0xFC31 soon.
Added:
Modified:
clang/include/clang/Basic/BuiltinsWebAssembly.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-wasm.c
llvm/include/llvm/IR/IntrinsicsWebAssembly.td
llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
llvm/test/CodeGen/WebAssembly/half-precision.ll
llvm/test/CodeGen/WebAssembly/offset.ll
llvm/test/MC/WebAssembly/simd-encodings.s
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index cf54f8f4422f8..8645cff1e8679 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -192,6 +192,7 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f"
// Half-Precision (fp16)
TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
+TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
// Reference Types builtins
// Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4b03b8b0e093e..f9ee93049b12d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21310,6 +21310,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
return Builder.CreateCall(Callee, {Addr});
}
+ case WebAssembly::BI__builtin_wasm_storef16_f32: {
+ Value *Val = EmitScalarExpr(E->getArg(0));
+ Value *Addr = EmitScalarExpr(E->getArg(1));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
+ return Builder.CreateCall(Callee, {Val, Addr});
+ }
case WebAssembly::BI__builtin_wasm_table_get: {
assert(E->getArg(0)->getType()->isArrayType());
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index ab1c6cd494ae5..bcb15969de1c5 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -807,6 +807,12 @@ float load_f16_f32(__fp16 *addr) {
// WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}})
}
+void store_f16_f32(float val, __fp16 *addr) {
+ return __builtin_wasm_storef16_f32(val, addr);
+ // WEBASSEMBLY: tail call void @llvm.wasm.storef16.f32(float %val, ptr %{{.*}})
+ // WEBASSEMBLY-NEXT: ret
+}
+
__externref_t externref_null() {
return __builtin_wasm_ref_null_extern();
// WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern()
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index f8142a8ca9e93..572d334ac9552 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -332,6 +332,11 @@ def int_wasm_loadf16_f32:
[llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly],
"", [SDNPMemOperand]>;
+def int_wasm_storef16_f32:
+ Intrinsic<[],
+ [llvm_float_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly],
+ "", [SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index d3b496ae59179..d4e9fb057c44d 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -207,6 +207,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
WASM_LOAD_STORE(LOAD_LANE_I16x8)
WASM_LOAD_STORE(STORE_LANE_I16x8)
WASM_LOAD_STORE(LOAD_F16_F32)
+ WASM_LOAD_STORE(STORE_F16_F32)
return 1;
WASM_LOAD_STORE(LOAD_I32)
WASM_LOAD_STORE(LOAD_F32)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index ed52fe53bc609..527bb4c9fbea6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -914,6 +914,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = Align(2);
Info.flags = MachineMemOperand::MOLoad;
return true;
+ case Intrinsic::wasm_storef16_f32:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::f16;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = Align(2);
+ Info.flags = MachineMemOperand::MOStore;
+ return true;
default:
return false;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index e4baf842462a9..9d452879bbf80 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -72,8 +72,9 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>;
defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;
-// Half Precision
-defm LOAD_F16_F32 : WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
+// Half-precision load.
+defm LOAD_F16_F32 :
+ WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>;
// Pattern matching
@@ -171,12 +172,18 @@ defm STORE8_I64 : WebAssemblyStore<I64, "i64.store8", 0x3c>;
defm STORE16_I64 : WebAssemblyStore<I64, "i64.store16", 0x3d>;
defm STORE32_I64 : WebAssemblyStore<I64, "i64.store32", 0x3e>;
+// Half-precision store.
+defm STORE_F16_F32 :
+ WebAssemblyStore<F32, "f32.store_f16", 0xfc31, [HasHalfPrecision]>;
+
defm : StorePat<i32, truncstorei8, "STORE8_I32">;
defm : StorePat<i32, truncstorei16, "STORE16_I32">;
defm : StorePat<i64, truncstorei8, "STORE8_I64">;
defm : StorePat<i64, truncstorei16, "STORE16_I64">;
defm : StorePat<i64, truncstorei32, "STORE32_I64">;
+defm : StorePat<f32, int_wasm_storef16_f32, "STORE_F16_F32">;
+
multiclass MemoryOps<WebAssemblyRegClass rc, string B> {
// Current memory size.
defm MEMORY_SIZE_A#B : I<(outs rc:$dst), (ins i32imm:$flags),
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 582771d3f95fc..89e9c42637c14 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s
declare float @llvm.wasm.loadf32.f16(ptr)
+declare void @llvm.wasm.storef16.f32(float, ptr)
; CHECK-LABEL: ldf16_32:
; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}}
@@ -10,3 +11,11 @@ define float @ldf16_32(ptr %p) {
%v = call float @llvm.wasm.loadf16.f32(ptr %p)
ret float %v
}
+
+; CHECK-LABEL: stf16_32:
+; CHECK: f32.store_f16 0($1), $0
+; CHECK-NEXT: return
+define void @stf16_32(float %v, ptr %p) {
+ tail call void @llvm.wasm.storef16.f32(float %v, ptr %p)
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll
index b497ddd7273a0..65de341780e31 100644
--- a/llvm/test/CodeGen/WebAssembly/offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset.ll
@@ -692,3 +692,30 @@ define float @load_f16_f32_with_folded_gep_offset(ptr %p) {
%t = call float @llvm.wasm.loadf16.f32(ptr %s)
ret float %t
}
+
+;===----------------------------------------------------------------------------
+; Stores: Half Precision
+;===----------------------------------------------------------------------------
+
+; Basic store.
+
+; CHECK-LABEL: store_f16_f32_no_offset:
+; CHECK-NEXT: .functype store_f16_f32_no_offset (i32, f32) -> (){{$}}
+; CHECK-NEXT: f32.store_f16 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @store_f16_f32_no_offset(ptr %p, float %v) {
+ call void @llvm.wasm.storef16.f32(float %v, ptr %p)
+ ret void
+}
+
+; Storing to a fixed address.
+
+; CHECK-LABEL: store_f16_f32_to_numeric_address:
+; CHECK: i32.const $push1=, 0{{$}}
+; CHECK-NEXT: f32.const $push0=, 0x0p0{{$}}
+; CHECK-NEXT: f32.store_f16 42($pop1), $pop0{{$}}
+define void @store_f16_f32_to_numeric_address() {
+ %s = inttoptr i32 42 to ptr
+ call void @llvm.wasm.storef16.f32(float 0.0, ptr %s)
+ ret void
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index e7c3761f381d0..57fa71e74b8d7 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -842,4 +842,7 @@ main:
# CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30]
f32.load_f16 48
+ # CHECK: f32.store_f16 32 # encoding: [0xfc,0x31,0x01,0x20]
+ f32.store_f16 32
+
end_function
More information about the cfe-commits
mailing list