[clang] [llvm] [WebAssembly] Implement f16x8.replace_lane instruction. (PR #99388)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 17 14:14:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
@llvm/pr-subscribers-mc
Author: Brendan Dahl (brendandahl)
<details>
<summary>Changes</summary>
Use a builtin and intrinsic until half types are better supported for instruction selection.
---
Full diff: https://github.com/llvm/llvm-project/pull/99388.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsWebAssembly.def (+1)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+7)
- (modified) clang/test/CodeGen/builtins-wasm.c (+6)
- (modified) llvm/include/llvm/IR/IntrinsicsWebAssembly.td (+4)
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td (+13)
- (modified) llvm/test/CodeGen/WebAssembly/half-precision.ll (+8)
- (modified) llvm/test/MC/WebAssembly/simd-encodings.s (+3)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 2a45f8a6582a2..df304a71e475e 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -201,6 +201,7 @@ TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision")
TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision")
TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "half-precision")
TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "half-precision")
+TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "half-precision")
// Reference Types builtins
// Some builtins are custom type-checked - see 't' as part of the third argument,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 67027f8aa93f3..402b7a7b20e61 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -21386,6 +21386,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
return Builder.CreateCall(Callee, {Vector, Index});
}
+ case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
+ Value *Vector = EmitScalarExpr(E->getArg(0));
+ Value *Index = EmitScalarExpr(E->getArg(1));
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
+ return Builder.CreateCall(Callee, {Vector, Index, Val});
+ }
case WebAssembly::BI__builtin_wasm_table_get: {
assert(E->getArg(0)->getType()->isArrayType());
Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 75861b1b4bd6d..f494aeada0157 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -840,6 +840,12 @@ float extract_lane_f16x8(f16x8 a, int i) {
return __builtin_wasm_extract_lane_f16x8(a, i);
}
+f16x8 replace_lane_f16x8(f16x8 a, int i, float v) {
+ // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v)
+ // WEBASSEMBLY-NEXT: ret <8 x half> %0
+ return __builtin_wasm_replace_lane_f16x8(a, i, v);
+}
+
f16x8 min_f16x8(f16x8 a, f16x8 b) {
// WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
// WEBASSEMBLY-NEXT: ret <8 x half> %0
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 47aab196a6d4f..4d2df1c44ebce 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -363,6 +363,10 @@ def int_wasm_extract_lane_f16x8:
DefaultAttrsIntrinsic<[llvm_float_ty],
[llvm_v8f16_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_replace_lane_f16x8:
+ DefaultAttrsIntrinsic<[llvm_v8f16_ty],
+ [llvm_v8f16_ty, llvm_i32_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..f11fe12c6ecb8 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>;
defm "" : ReplaceLane<F32x4, 32>;
defm "" : ReplaceLane<F64x2, 34>;
+// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above
+// since LL generated with half type arguments is not well supported and creates
+// conversions from f16->f32.
+defm REPLACE_LANE_F16x8 :
+ HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x),
+ (outs), (ins vec_i8imm_op:$idx),
+ [(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8
+ (v8f16 V128:$vec),
+ (i32 LaneIdx8:$idx),
+ (f32 F32:$x)))],
+ "f16x8.replace_lane\t$dst, $vec, $idx, $x",
+ "f16x8.replace_lane\t$idx", 0x122>;
+
// Lower undef lane indices to zero
def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
(REPLACE_LANE_I8x16 $vec, 0, $x)>;
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index fa78f5f9591d6..dba4138ad59cc 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -36,6 +36,14 @@ define float @extract_lane_v8f16(<8 x half> %v) {
ret float %r
}
+; CHECK-LABEL: replace_lane_v8f16:
+; CHECK: f16x8.replace_lane $push0=, $0, 1, $1
+; CHECK-NEXT: return $pop0
+define <8 x half> @replace_lane_v8f16(<8 x half> %v, float %f) {
+ %r = call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %v, i32 1, float %f)
+ ret <8 x half> %r
+}
+
; CHECK-LABEL: add_v8f16:
; CHECK: f16x8.add $push0=, $0, $1
; CHECK-NEXT: return $pop0
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 8c3483bfaad7a..7ae4d47d888cf 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -851,6 +851,9 @@ main:
# CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01]
f16x8.extract_lane 1
+ # CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01]
+ f16x8.replace_lane 1
+
# CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02]
f16x8.add
``````````
</details>
https://github.com/llvm/llvm-project/pull/99388
More information about the cfe-commits
mailing list