[clang] [X86][CIR]Implement handling for F16 halfs to float conversion builtins (PR #173572)
Priyanshu Kumar via cfe-commits
cfe-commits at lists.llvm.org
Sat Dec 27 06:36:24 PST 2025
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/173572
>From 21e58286204a47fb72470e7a1598e1d649ddfcba Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 14:21:53 +0000
Subject: [PATCH 01/16] Implement handling for F16 halfs to floats conversion
builtins
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 56 +++++-
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 185 ++++++++++++++++++
2 files changed, 240 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..07f915b51ad6d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,20 @@
#include "CIRGenBuilder.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
#include "mlir/IR/ValueRange.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
using namespace clang;
using namespace clang::CIRGen;
@@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createMul(loc, lhs, rhs);
}
+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const StringRef str,
+ llvm::ArrayRef<mlir::Value> ops,
+ mlir::Type dstTy) {
+ assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+ "Unknown cvtph2ps intrinsic");
+
+ // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+ if (ops.size() == 4 &&
+ ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+ 4) {
+ return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+ }
+
+ unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
+ mlir::Value src = ops[0];
+
+ // Extract the subvector
+ if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
+ assert(numElts == 4 && "Unexpected vector size");
+ src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
+ }
+
+ // Bitcast from vXi16 to vXf16.
+ cir::VectorType halfTy = cir::VectorType::get(
+ cir::FP16Type::get(builder.getContext()), numElts);
+
+ src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
+
+ // Perform the fp-extension
+ mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
+
+ if (ops.size() >= 3)
+ res = emitX86Select(builder, loc, ops[2], res, ops[1]);
+ return res;
+}
+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::SmallVector<mlir::Value> ops,
bool isSigned) {
@@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_cmpnltsd:
case X86::BI__builtin_ia32_cmpnlesd:
case X86::BI__builtin_ia32_cmpordsd:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_vcvtph2ps_mask:
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
+ convertType(expr->getType()));
+ }
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
new file mode 100644
index 0000000000000..ee42f5de48d98
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
+ // CIR-LABEL: test_vcvtph2ps_mask
+ // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps_mask
+ // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // LLVM: ret <4 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps_mask
+ // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // OGCG: ret <4 x float> {{.*}}
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
+}
+
+__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
+ // CIR-LABEL: test_vcvtph2ps256_mask
+ // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps256_mask
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+ // LLVM: ret <8 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps256_mask
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+ // OGCG: ret <8 x float> {{.*}}
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
+}
+
+__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
+ // CIR-LABEL: test_vcvtph2ps512_mask
+ // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
+ // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps512_mask
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+ // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+ // LLVM: ret <16 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps512_mask
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+ // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+ // OGCG: ret <16 x float> {{.*}}
+ typedef short __v16hi __attribute__((__vector_size__(32)));
+ return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
+}
+
+__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+ // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+ // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+
+ // LLVM-LABEL: @test_vcvtph2ps_maskz
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+ // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+ // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // LLVM: ret <4 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps_maskz
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+ // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+ // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // OGCG: ret <4 x float> {{.*}}
+
+ return _mm_maskz_cvtph_ps(k, a);
+}
+
+__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+ // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
+
+
+ // LLVM-LABEL: @test_vcvtph2ps256_maskz
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+ // LLVM: ret <8 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps256_maskz
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+ // OGCG: ret <8 x float> {{.*}}
+ return _mm256_maskz_cvtph_ps(k, a);
+}
+
+__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+ // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+
+ // LLVM-LABEL: @test_vcvtph2ps512_maskz
+ // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+ // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // LLVM: ret <16 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps512_maskz
+ // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+ // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // OGCG: ret <16 x float> {{.*}}
+ return _mm512_maskz_cvtph_ps(k, a);
+}
>From b73200cc338b40a38999ccbdeb174e45c9e9fff2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 14:30:42 +0000
Subject: [PATCH 02/16] Remove unwanted headers included by clangd
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 7 -------
1 file changed, 7 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 07f915b51ad6d..9ecec9d615bc4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,20 +14,13 @@
#include "CIRGenBuilder.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
-#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Location.h"
-#include "mlir/IR/Types.h"
-#include "mlir/IR/Value.h"
#include "mlir/IR/ValueRange.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
-#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
using namespace clang;
using namespace clang::CIRGen;
>From a72461410892a3561c0cede6cd564a266f507eb4 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 14:33:48 +0000
Subject: [PATCH 03/16] Fix formatting
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 9ecec9d615bc4..b39a4e683385d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -388,8 +388,8 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
}
// Bitcast from vXi16 to vXf16.
- cir::VectorType halfTy = cir::VectorType::get(
- cir::FP16Type::get(builder.getContext()), numElts);
+ cir::VectorType halfTy =
+ cir::VectorType::get(cir::FP16Type::get(builder.getContext()), numElts);
src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
>From b29a415f3dbfb072c3b29e0c219f9dd1f3e3dbec Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 14:34:03 +0000
Subject: [PATCH 04/16] Fix formatting
---
.../X86/avx512f16c-builtins.cir | 393 ++++++++++++++++++
1 file changed, 393 insertions(+)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
new file mode 100644
index 0000000000000..9364d531b3585
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
@@ -0,0 +1,393 @@
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+!u8i = !cir.int<u, 8>
+#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28)
+#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36)
+#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39)
+#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46)
+#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51)
+#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60)
+#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31)
+#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39)
+#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42)
+#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49)
+#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54)
+#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63)
+#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31)
+#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39)
+#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42)
+#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49)
+#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54)
+#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64)
+#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21)
+#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30)
+#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35)
+#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43)
+#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29)
+#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37)
+#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40)
+#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49)
+#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24)
+#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33)
+#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38)
+#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46)
+#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32)
+#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40)
+#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43)
+#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52)
+#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24)
+#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34)
+#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39)
+#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47)
+#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32)
+#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40)
+#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43)
+#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53)
+#loc145 = loc(fused[#loc3, #loc4])
+#loc146 = loc(fused[#loc5, #loc6])
+#loc147 = loc(fused[#loc7, #loc8])
+#loc150 = loc(fused[#loc18, #loc19])
+#loc151 = loc(fused[#loc20, #loc21])
+#loc152 = loc(fused[#loc22, #loc23])
+#loc155 = loc(fused[#loc33, #loc34])
+#loc156 = loc(fused[#loc35, #loc36])
+#loc157 = loc(fused[#loc37, #loc38])
+#loc164 = loc(fused[#loc56, #loc57])
+#loc165 = loc(fused[#loc58, #loc59])
+#loc168 = loc(fused[#loc69, #loc70])
+#loc169 = loc(fused[#loc71, #loc72])
+#loc176 = loc(fused[#loc88, #loc89])
+#loc177 = loc(fused[#loc90, #loc91])
+#loc180 = loc(fused[#loc101, #loc102])
+#loc181 = loc(fused[#loc103, #loc104])
+#loc188 = loc(fused[#loc120, #loc121])
+#loc189 = loc(fused[#loc122, #loc123])
+#loc192 = loc(fused[#loc134, #loc135])
+#loc193 = loc(fused[#loc136, #loc137])
+module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang<c>, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
+ cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> {
+ %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc145)
+ %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["src", init] {alignment = 16 : i64} loc(#loc146)
+ %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc147)
+ %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc2)
+ cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
+ cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc9)
+ cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc9)
+ %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc10)
+ %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10)
+ %6 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc11)
+ %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc12)
+ %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13)
+ %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13)
+ %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13)
+ %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13)
+ %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12)
+ %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13)
+ %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13)
+ cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc148)
+ %15 = cir.load %3 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc148)
+ cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148)
+ } loc(#loc144)
+ cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> {
+ %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc150)
+ %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["src", init] {alignment = 32 : i64} loc(#loc151)
+ %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc152)
+ %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc17)
+ cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
+ cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc24)
+ cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc24)
+ %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc25)
+ %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25)
+ %6 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc26)
+ %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc27)
+ %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25)
+ %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25)
+ %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27)
+ %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28)
+ cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc153)
+ %12 = cir.load %3 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc153)
+ cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153)
+ } loc(#loc149)
+ cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> {
+ %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc155)
+ %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["src", init] {alignment = 64 : i64} loc(#loc156)
+ %2 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc157)
+ %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc32)
+ cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc39)
+ cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc39)
+ cir.store %arg2, %2 : !u16i, !cir.ptr<!u16i> loc(#loc39)
+ %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc40)
+ %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40)
+ %6 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc41)
+ %7 = cir.load align(2) %2 : !cir.ptr<!u16i>, !u16i loc(#loc42)
+ %8 = cir.const #cir.int<4> : !s32i loc(#loc43)
+ %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40)
+ %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40)
+ %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42)
+ %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44)
+ cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc158)
+ %13 = cir.load %3 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc158)
+ cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158)
+ } loc(#loc154)
+ cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> {
+ %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc48)
+ %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160)
+ %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161)
+ cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc52)
+ %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc49)
+ cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc162)
+ %4 = cir.load %0 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc162)
+ cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162)
+ } loc(#loc159)
+ cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> {
+ %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc164)
+ %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc165)
+ %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc55)
+ cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc60)
+ cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc60)
+ %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc61)
+ %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61)
+ %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62)
+ %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc63)
+ %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64)
+ %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64)
+ %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64)
+ %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64)
+ %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63)
+ %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64)
+ %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64)
+ cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc166)
+ %14 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc166)
+ cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166)
+ } loc(#loc163)
+ cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> {
+ %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc168)
+ %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc169)
+ %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc68)
+ cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc73)
+ cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc73)
+ %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc74)
+ %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc75)
+ %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76)
+ cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc170)
+ %6 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc170)
+ cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170)
+ } loc(#loc167)
+ cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> {
+ %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc80)
+ %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172)
+ %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173)
+ cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc84)
+ %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc81)
+ cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc174)
+ %4 = cir.load %0 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc174)
+ cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174)
+ } loc(#loc171)
+ cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> {
+ %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc176)
+ %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc177)
+ %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc87)
+ cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc92)
+ cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc92)
+ %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc93)
+ %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93)
+ %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94)
+ %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc95)
+ %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93)
+ %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93)
+ %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95)
+ %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96)
+ cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc178)
+ %11 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc178)
+ cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178)
+ } loc(#loc175)
+ cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> {
+ %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc180)
+ %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc181)
+ %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc100)
+ cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc105)
+ cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc105)
+ %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc106)
+ %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc107)
+ %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108)
+ cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc182)
+ %6 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc182)
+ cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182)
+ } loc(#loc179)
+ cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> {
+ %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc112)
+ %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184)
+ %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185)
+ cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc116)
+ %3 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc113)
+ cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc186)
+ %4 = cir.load %0 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc186)
+ cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186)
+ } loc(#loc183)
+ cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> {
+ %0 = cir.alloca !u16i, !cir.ptr<!u16i>, ["__U", init] {alignment = 2 : i64} loc(#loc188)
+ %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["__A", init] {alignment = 32 : i64} loc(#loc189)
+ %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc119)
+ cir.store %arg0, %0 : !u16i, !cir.ptr<!u16i> loc(#loc124)
+ cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc124)
+ %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc125)
+ %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125)
+ %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126)
+ %6 = cir.load align(2) %0 : !cir.ptr<!u16i>, !u16i loc(#loc127)
+ %7 = cir.const #cir.int<4> : !s32i loc(#loc128)
+ %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125)
+ %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125)
+ %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127)
+ %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129)
+ cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc190)
+ %12 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc190)
+ cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190)
+ } loc(#loc187)
+ cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> {
+ %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc192)
+ %1 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc193)
+ %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc133)
+ cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc138)
+ cir.store %arg1, %1 : !u16i, !cir.ptr<!u16i> loc(#loc138)
+ %3 = cir.load align(2) %1 : !cir.ptr<!u16i>, !u16i loc(#loc139)
+ %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc140)
+ %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141)
+ cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc194)
+ %6 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc194)
+ cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194)
+ } loc(#loc191)
+} loc(#loc)
+#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0)
+#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1)
+#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1)
+#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63)
+#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48)
+#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51)
+#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56)
+#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10)
+#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3)
+#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57)
+#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1)
+#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1)
+#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66)
+#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51)
+#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54)
+#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59)
+#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10)
+#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3)
+#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60)
+#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1)
+#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1)
+#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67)
+#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52)
+#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55)
+#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60)
+#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63)
+#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10)
+#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3)
+#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64)
+#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1)
+#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1)
+#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24)
+#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57)
+#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32)
+#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1)
+#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3)
+#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1)
+#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1)
+#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1)
+#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59)
+#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14)
+#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25)
+#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19)
+#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3)
+#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28)
+#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1)
+#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1)
+#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52)
+#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29)
+#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32)
+#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10)
+#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3)
+#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33)
+#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1)
+#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1)
+#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24)
+#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81)
+#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32)
+#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53)
+#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3)
+#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1)
+#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1)
+#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1)
+#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62)
+#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17)
+#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28)
+#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19)
+#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3)
+#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31)
+#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1)
+#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1)
+#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55)
+#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33)
+#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36)
+#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11)
+#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4)
+#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37)
+#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1)
+#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1)
+#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23)
+#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78)
+#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31)
+#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56)
+#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3)
+#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1)
+#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1)
+#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1)
+#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63)
+#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28)
+#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30)
+#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18)
+#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19)
+#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3)
+#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42)
+#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1)
+#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1)
+#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56)
+#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32)
+#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35)
+#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10)
+#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3)
+#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36)
+#loc144 = loc(fused[#loc1, #loc2])
+#loc148 = loc(fused[#loc14, #loc15])
+#loc149 = loc(fused[#loc16, #loc17])
+#loc153 = loc(fused[#loc29, #loc30])
+#loc154 = loc(fused[#loc31, #loc32])
+#loc158 = loc(fused[#loc45, #loc46])
+#loc159 = loc(fused[#loc47, #loc48])
+#loc160 = loc(fused[#loc49, #loc50])
+#loc161 = loc(fused[#loc51, #loc50])
+#loc162 = loc(fused[#loc53, #loc50])
+#loc163 = loc(fused[#loc54, #loc55])
+#loc166 = loc(fused[#loc65, #loc66])
+#loc167 = loc(fused[#loc67, #loc68])
+#loc170 = loc(fused[#loc77, #loc78])
+#loc171 = loc(fused[#loc79, #loc80])
+#loc172 = loc(fused[#loc81, #loc82])
+#loc173 = loc(fused[#loc83, #loc82])
+#loc174 = loc(fused[#loc85, #loc82])
+#loc175 = loc(fused[#loc86, #loc87])
+#loc178 = loc(fused[#loc97, #loc98])
+#loc179 = loc(fused[#loc99, #loc100])
+#loc182 = loc(fused[#loc109, #loc110])
+#loc183 = loc(fused[#loc111, #loc112])
+#loc184 = loc(fused[#loc113, #loc114])
+#loc185 = loc(fused[#loc115, #loc114])
+#loc186 = loc(fused[#loc117, #loc114])
+#loc187 = loc(fused[#loc118, #loc119])
+#loc190 = loc(fused[#loc130, #loc131])
+#loc191 = loc(fused[#loc132, #loc133])
+#loc194 = loc(fused[#loc142, #loc143])
>From 284c8750554ef09d2f919e30ef2c82d8289ef493 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 20:29:57 +0530
Subject: [PATCH 05/16] Delete
clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
---
.../X86/avx512f16c-builtins.cir | 393 ------------------
1 file changed, 393 deletions(-)
delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
deleted file mode 100644
index 9364d531b3585..0000000000000
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
+++ /dev/null
@@ -1,393 +0,0 @@
-!s16i = !cir.int<s, 16>
-!s32i = !cir.int<s, 32>
-!s64i = !cir.int<s, 64>
-!u16i = !cir.int<u, 16>
-!u8i = !cir.int<u, 8>
-#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28)
-#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36)
-#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39)
-#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46)
-#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51)
-#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60)
-#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31)
-#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39)
-#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42)
-#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49)
-#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54)
-#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63)
-#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31)
-#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39)
-#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42)
-#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49)
-#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54)
-#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64)
-#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21)
-#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30)
-#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35)
-#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43)
-#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29)
-#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37)
-#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40)
-#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49)
-#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24)
-#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33)
-#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38)
-#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46)
-#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32)
-#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40)
-#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43)
-#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52)
-#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24)
-#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34)
-#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39)
-#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47)
-#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32)
-#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40)
-#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43)
-#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53)
-#loc145 = loc(fused[#loc3, #loc4])
-#loc146 = loc(fused[#loc5, #loc6])
-#loc147 = loc(fused[#loc7, #loc8])
-#loc150 = loc(fused[#loc18, #loc19])
-#loc151 = loc(fused[#loc20, #loc21])
-#loc152 = loc(fused[#loc22, #loc23])
-#loc155 = loc(fused[#loc33, #loc34])
-#loc156 = loc(fused[#loc35, #loc36])
-#loc157 = loc(fused[#loc37, #loc38])
-#loc164 = loc(fused[#loc56, #loc57])
-#loc165 = loc(fused[#loc58, #loc59])
-#loc168 = loc(fused[#loc69, #loc70])
-#loc169 = loc(fused[#loc71, #loc72])
-#loc176 = loc(fused[#loc88, #loc89])
-#loc177 = loc(fused[#loc90, #loc91])
-#loc180 = loc(fused[#loc101, #loc102])
-#loc181 = loc(fused[#loc103, #loc104])
-#loc188 = loc(fused[#loc120, #loc121])
-#loc189 = loc(fused[#loc122, #loc123])
-#loc192 = loc(fused[#loc134, #loc135])
-#loc193 = loc(fused[#loc136, #loc137])
-module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang<c>, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
- cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> {
- %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc145)
- %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["src", init] {alignment = 16 : i64} loc(#loc146)
- %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc147)
- %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc2)
- cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
- cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc9)
- cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc9)
- %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc10)
- %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10)
- %6 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc11)
- %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc12)
- %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13)
- %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13)
- %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13)
- %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13)
- %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12)
- %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13)
- %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13)
- cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc148)
- %15 = cir.load %3 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc148)
- cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148)
- } loc(#loc144)
- cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> {
- %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc150)
- %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["src", init] {alignment = 32 : i64} loc(#loc151)
- %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc152)
- %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc17)
- cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
- cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc24)
- cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc24)
- %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc25)
- %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25)
- %6 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc26)
- %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc27)
- %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25)
- %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25)
- %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27)
- %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28)
- cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc153)
- %12 = cir.load %3 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc153)
- cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153)
- } loc(#loc149)
- cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> {
- %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc155)
- %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["src", init] {alignment = 64 : i64} loc(#loc156)
- %2 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc157)
- %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc32)
- cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc39)
- cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc39)
- cir.store %arg2, %2 : !u16i, !cir.ptr<!u16i> loc(#loc39)
- %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc40)
- %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40)
- %6 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc41)
- %7 = cir.load align(2) %2 : !cir.ptr<!u16i>, !u16i loc(#loc42)
- %8 = cir.const #cir.int<4> : !s32i loc(#loc43)
- %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40)
- %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40)
- %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42)
- %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44)
- cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc158)
- %13 = cir.load %3 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc158)
- cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158)
- } loc(#loc154)
- cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> {
- %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc48)
- %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160)
- %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161)
- cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc52)
- %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc49)
- cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc162)
- %4 = cir.load %0 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc162)
- cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162)
- } loc(#loc159)
- cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> {
- %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc164)
- %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc165)
- %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc55)
- cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc60)
- cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc60)
- %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc61)
- %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61)
- %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62)
- %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc63)
- %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64)
- %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64)
- %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64)
- %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64)
- %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63)
- %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64)
- %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64)
- cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc166)
- %14 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc166)
- cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166)
- } loc(#loc163)
- cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> {
- %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc168)
- %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc169)
- %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc68)
- cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc73)
- cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc73)
- %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc74)
- %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc75)
- %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76)
- cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc170)
- %6 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc170)
- cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170)
- } loc(#loc167)
- cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> {
- %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc80)
- %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172)
- %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173)
- cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc84)
- %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc81)
- cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc174)
- %4 = cir.load %0 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc174)
- cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174)
- } loc(#loc171)
- cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> {
- %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc176)
- %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc177)
- %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc87)
- cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc92)
- cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc92)
- %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc93)
- %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93)
- %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94)
- %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc95)
- %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93)
- %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93)
- %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95)
- %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96)
- cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc178)
- %11 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc178)
- cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178)
- } loc(#loc175)
- cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> {
- %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc180)
- %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc181)
- %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc100)
- cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc105)
- cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc105)
- %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc106)
- %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc107)
- %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108)
- cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc182)
- %6 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc182)
- cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182)
- } loc(#loc179)
- cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> {
- %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc112)
- %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184)
- %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185)
- cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc116)
- %3 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc113)
- cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc186)
- %4 = cir.load %0 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc186)
- cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186)
- } loc(#loc183)
- cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> {
- %0 = cir.alloca !u16i, !cir.ptr<!u16i>, ["__U", init] {alignment = 2 : i64} loc(#loc188)
- %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["__A", init] {alignment = 32 : i64} loc(#loc189)
- %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc119)
- cir.store %arg0, %0 : !u16i, !cir.ptr<!u16i> loc(#loc124)
- cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc124)
- %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc125)
- %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125)
- %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126)
- %6 = cir.load align(2) %0 : !cir.ptr<!u16i>, !u16i loc(#loc127)
- %7 = cir.const #cir.int<4> : !s32i loc(#loc128)
- %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125)
- %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125)
- %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127)
- %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129)
- cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc190)
- %12 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc190)
- cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190)
- } loc(#loc187)
- cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> {
- %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc192)
- %1 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc193)
- %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc133)
- cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc138)
- cir.store %arg1, %1 : !u16i, !cir.ptr<!u16i> loc(#loc138)
- %3 = cir.load align(2) %1 : !cir.ptr<!u16i>, !u16i loc(#loc139)
- %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc140)
- %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141)
- cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc194)
- %6 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc194)
- cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194)
- } loc(#loc191)
-} loc(#loc)
-#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0)
-#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1)
-#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1)
-#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63)
-#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48)
-#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51)
-#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56)
-#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10)
-#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3)
-#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57)
-#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1)
-#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1)
-#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66)
-#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51)
-#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54)
-#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59)
-#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10)
-#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3)
-#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60)
-#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1)
-#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1)
-#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67)
-#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52)
-#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55)
-#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60)
-#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63)
-#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10)
-#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3)
-#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64)
-#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1)
-#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1)
-#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24)
-#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57)
-#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32)
-#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1)
-#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3)
-#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1)
-#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1)
-#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1)
-#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59)
-#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14)
-#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25)
-#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19)
-#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3)
-#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28)
-#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1)
-#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1)
-#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52)
-#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29)
-#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32)
-#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10)
-#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3)
-#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33)
-#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1)
-#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1)
-#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24)
-#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81)
-#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32)
-#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53)
-#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3)
-#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1)
-#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1)
-#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1)
-#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62)
-#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17)
-#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28)
-#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19)
-#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3)
-#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31)
-#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1)
-#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1)
-#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55)
-#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33)
-#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36)
-#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11)
-#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4)
-#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37)
-#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1)
-#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1)
-#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23)
-#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78)
-#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31)
-#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56)
-#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3)
-#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1)
-#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1)
-#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1)
-#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63)
-#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28)
-#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30)
-#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18)
-#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19)
-#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3)
-#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42)
-#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1)
-#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1)
-#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56)
-#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32)
-#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35)
-#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10)
-#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3)
-#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36)
-#loc144 = loc(fused[#loc1, #loc2])
-#loc148 = loc(fused[#loc14, #loc15])
-#loc149 = loc(fused[#loc16, #loc17])
-#loc153 = loc(fused[#loc29, #loc30])
-#loc154 = loc(fused[#loc31, #loc32])
-#loc158 = loc(fused[#loc45, #loc46])
-#loc159 = loc(fused[#loc47, #loc48])
-#loc160 = loc(fused[#loc49, #loc50])
-#loc161 = loc(fused[#loc51, #loc50])
-#loc162 = loc(fused[#loc53, #loc50])
-#loc163 = loc(fused[#loc54, #loc55])
-#loc166 = loc(fused[#loc65, #loc66])
-#loc167 = loc(fused[#loc67, #loc68])
-#loc170 = loc(fused[#loc77, #loc78])
-#loc171 = loc(fused[#loc79, #loc80])
-#loc172 = loc(fused[#loc81, #loc82])
-#loc173 = loc(fused[#loc83, #loc82])
-#loc174 = loc(fused[#loc85, #loc82])
-#loc175 = loc(fused[#loc86, #loc87])
-#loc178 = loc(fused[#loc97, #loc98])
-#loc179 = loc(fused[#loc99, #loc100])
-#loc182 = loc(fused[#loc109, #loc110])
-#loc183 = loc(fused[#loc111, #loc112])
-#loc184 = loc(fused[#loc113, #loc114])
-#loc185 = loc(fused[#loc115, #loc114])
-#loc186 = loc(fused[#loc117, #loc114])
-#loc187 = loc(fused[#loc118, #loc119])
-#loc190 = loc(fused[#loc130, #loc131])
-#loc191 = loc(fused[#loc132, #loc133])
-#loc194 = loc(fused[#loc142, #loc143])
>From 416199137834e287ad333adfb5181e5941f71826 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 16:28:44 +0000
Subject: [PATCH 06/16] Update Test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 43 ++++++++++++-------
1 file changed, 28 insertions(+), 15 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index ee42f5de48d98..35fe714fea626 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,11 +8,17 @@
#include <immintrin.h>
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
- // CIR-LABEL: test_vcvtph2ps_mask
- // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
- // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+ // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps_mask
// LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -36,11 +42,15 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
}
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
- // CIR-LABEL: test_vcvtph2ps256_mask
- // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+ // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps256_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -62,12 +72,15 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
}
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
- // CIR-LABEL: test_vcvtph2ps512_mask
- // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
- // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool>
+ // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps512_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
>From b91283786ee77431883b0c948a2ffa0bbb773c40 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 16:33:42 +0000
Subject: [PATCH 07/16] Update Test
---
clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 35fe714fea626..a376e792005c5 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,7 +8,7 @@
#include <immintrin.h>
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
- // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
>From dc57ff39f224e634b02abcea2db7488725ee00cb Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 16:59:17 +0000
Subject: [PATCH 08/16] Update test
---
.../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index a376e792005c5..680b37abb5436 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,6 +9,7 @@
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+ // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
@@ -43,6 +44,7 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
+ // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
@@ -73,6 +75,7 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
+ // CIR: cir.store {{.*}} : !u16i, !cir.ptr<!u16i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
@@ -102,7 +105,7 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
}
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
- // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+ // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
// CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
@@ -114,7 +117,7 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
// CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
- // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+ // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
// CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
// LLVM-LABEL: @test_vcvtph2ps_maskz
@@ -139,14 +142,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
}
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
- // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+ // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
// CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+ // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
// CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -169,14 +172,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
}
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
- // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+ // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
// CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
// CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
// CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+ // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
// CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
// LLVM-LABEL: @test_vcvtph2ps512_maskz
>From 1c6c877243dab12efaebcf56f4089ef55ee82683 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 17:45:51 +0000
Subject: [PATCH 09/16] Update Test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 97 +++++++++----------
1 file changed, 45 insertions(+), 52 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 680b37abb5436..0c8960f5a8431 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,50 +9,45 @@
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
- // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
- // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
- // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
-
- // LLVM-LABEL: @test_vcvtph2ps_mask
- // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
- // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
- // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
- // LLVM: ret <4 x float> {{.*}}
-
- // OGCG-LABEL: @test_vcvtph2ps_mask
- // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
- // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
- // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
- // OGCG: ret <4 x float> {{.*}}
+ // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+
+ // LLVM-LABEL: @test_vcvtph2ps_mask
+ // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // LLVM: ret <4 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps_mask
+ // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // OGCG: ret <4 x float> {{.*}}
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
}
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
- // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
- // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps256_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -75,15 +70,14 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
- // CIR: cir.store {{.*}} : !u16i, !cir.ptr<!u16i>
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool>
- // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps512_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -106,16 +100,12 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
- // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
- // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+ // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
// CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -143,11 +133,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
- // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
// CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -173,11 +166,11 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
- // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
// CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
// CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
>From ab226a73182af66ee130ef3477b583d38ddb5e54 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 19:00:41 +0000
Subject: [PATCH 10/16] Update test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 84 +++++++++++--------
1 file changed, 47 insertions(+), 37 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 0c8960f5a8431..e1ce6475b66ff 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,14 +8,17 @@
#include <immintrin.h>
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
- // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps_mask
// LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -41,13 +44,13 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps256_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -71,13 +74,13 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps512_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -99,13 +102,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
}
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
- // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
// CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
// CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -132,15 +139,15 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
}
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
- // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
- // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
// CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -165,12 +172,15 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
}
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
- // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+ // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
// CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
- // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+ // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
// CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
// CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
>From 47dfc767457b8c405b04e74cd80ab4e823d3cdfd Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Thu, 25 Dec 2025 19:38:21 +0000
Subject: [PATCH 11/16] Update test
---
.../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index e1ce6475b66ff..6ec6e7447f72e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -13,7 +13,8 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+ // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
@@ -107,7 +108,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+ // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
@@ -135,7 +137,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
// OGCG: ret <4 x float> {{.*}}
- return _mm_maskz_cvtph_ps(k, a);
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
}
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
@@ -168,7 +171,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
// OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
// OGCG: ret <8 x float> {{.*}}
- return _mm256_maskz_cvtph_ps(k, a);
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
}
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
@@ -200,5 +204,6 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
// OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
// OGCG: ret <16 x float> {{.*}}
- return _mm512_maskz_cvtph_ps(k, a);
+ typedef short __v16hi __attribute__((__vector_size__(32)));
+ return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
}
>From be0c47638032ced5344ea7e36399ce64283e9fca Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 26 Dec 2025 03:58:51 +0000
Subject: [PATCH 12/16] Update test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 80 ++++++++-----------
1 file changed, 35 insertions(+), 45 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 6ec6e7447f72e..f5140502595d9 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -14,12 +14,12 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
+ // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps_mask
// LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps256_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps512_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -104,20 +104,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
- // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
-
- // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
- // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+ // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps()
+ // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+ // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps_maskz
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -143,18 +140,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
-
- // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
- // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
-
+ // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps()
+ // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps256_maskz
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -177,17 +170,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
-
- // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
- // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+ // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps()
+ // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps512_maskz
// LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
>From 168d3a1d2ed4076da58c363e2130789f3472661d Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 26 Dec 2025 05:06:39 +0000
Subject: [PATCH 13/16] Fix formatting
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 28 +++++++++----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index f5140502595d9..52e0e1c4298fe 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -17,9 +17,9 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+ // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps_mask
// LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+ // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps256_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
// CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
// CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+ // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
// LLVM-LABEL: @test_vcvtph2ps512_mask
// LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -112,9 +112,9 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
// CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]]
+ // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps_maskz
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -146,8 +146,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]]
+ // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps256_maskz
// LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -176,8 +176,8 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
// CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
// CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]]
+ // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]]
// LLVM-LABEL: @test_vcvtph2ps512_maskz
// LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
>From f63bfe3376c7c7520b1ea4abfb26805d03f02ae8 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 26 Dec 2025 06:11:31 +0000
Subject: [PATCH 14/16] Update test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 257 +++++++++---------
1 file changed, 129 insertions(+), 128 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 52e0e1c4298fe..1672e51a0f40e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,190 +9,191 @@
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+ // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}
// LLVM-LABEL: @test_vcvtph2ps_mask
- // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
- // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
- // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: bitcast <4 x i16> {{.*}} to <4 x half>
+ // LLVM: fpext <4 x half> {{.*}} to <4 x float>
+ // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer
+ // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
// LLVM: ret <4 x float> {{.*}}
// OGCG-LABEL: @test_vcvtph2ps_mask
- // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
- // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
- // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
- // OGCG: ret <4 x float> {{.*}}
+ // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: fpext <4 x half> {{.*}} to <4 x float>
+ // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer
+ // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
}
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+ // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps256_mask
- // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
- // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
- // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
- // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
- // LLVM: ret <8 x float> {{.*}}
+ // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: bitcast <8 x i16> {{.*}} to <8 x half>
+ // LLVM: fpext <8 x half> {{.*}} to <8 x float>
+ // LLVM: bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer
+ // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_mask
- // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
- // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
- // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
- // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
- // OGCG: ret <8 x float> {{.*}}
+ // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: bitcast <8 x i16> {{.*}} to <8 x half>
+ // OGCG: fpext <8 x half> {{.*}} to <8 x float>
+ // OGCG: bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer
+ // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
}
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
- // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
- // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+ // CIR: cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps512_mask
- // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
- // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
- // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
- // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
- // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
- // LLVM: ret <16 x float> {{.*}}
+ // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: bitcast <16 x i16> {{.*}} to <16 x half>
+ // LLVM: fpext <16 x half> {{.*}} to <16 x float>
+ // LLVM: bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer
+ // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
// OGCG-LABEL: @test_vcvtph2ps512_mask
- // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
- // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
- // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
- // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
- // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
- // OGCG: ret <16 x float> {{.*}}
+ // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: bitcast <16 x i16> {{.*}} to <16 x half>
+ // OGCG: fpext <16 x half> {{.*}} to <16 x float>
+ // OGCG: bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer
+ // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
typedef short __v16hi __attribute__((__vector_size__(32)));
return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
}
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
- // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps()
- // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
- // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]]
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps_maskz
- // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
- // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
- // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+ // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+ // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
// LLVM: ret <4 x float> {{.*}}
// OGCG-LABEL: @test_vcvtph2ps_maskz
- // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
- // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
- // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+ // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+ // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
// OGCG: ret <4 x float> {{.*}}
-
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
}
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
- // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps()
- // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]]
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps256_maskz
- // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
- // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
- // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
- // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
- // LLVM: ret <8 x float> {{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+ // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+ // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
+ // LLVM: ret <8 x float> {{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_maskz
- // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
- // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
- // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
- // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+ // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+ // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
// OGCG: ret <8 x float> {{.*}}
- typedef short __v8hi __attribute__((__vector_size__(16)));
- return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
}
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
- // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps()
- // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]]
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps512_maskz
- // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
- // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
- // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
- // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
- // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+ // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+ // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
// LLVM: ret <16 x float> {{.*}}
-
+
// OGCG-LABEL: @test_vcvtph2ps512_maskz
- // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
- // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
- // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
- // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
- // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+ // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+ // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
// OGCG: ret <16 x float> {{.*}}
typedef short __v16hi __attribute__((__vector_size__(32)));
return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
>From 964139c73bdc2f966ffb9ad5197e71b4e27f4450 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 26 Dec 2025 07:28:29 +0000
Subject: [PATCH 15/16] Update test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 224 ++++++++++--------
1 file changed, 127 insertions(+), 97 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 1672e51a0f40e..f26e0d5d5ccae 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,95 +9,113 @@
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
- // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i>
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
- // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps_mask
- // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: bitcast <4 x i16> {{.*}} to <4 x half>
- // LLVM: fpext <4 x half> {{.*}} to <4 x float>
- // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer
- // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
- // LLVM: ret <4 x float> {{.*}}
-
+ // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}}
+ // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+ // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+ // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+ // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ // LLVM: ret <4 x float> %{{.*}}
+
// OGCG-LABEL: @test_vcvtph2ps_mask
- // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: fpext <4 x half> {{.*}} to <4 x float>
- // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer
- // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
+ // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}}
+ // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+ // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+ // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+ // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
}
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
- // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
- // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps256_mask
- // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
- // LLVM: bitcast <8 x i16> {{.*}} to <8 x half>
- // LLVM: fpext <8 x half> {{.*}} to <8 x float>
- // LLVM: bitcast i8 {{.*}} to <8 x i1>
- // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer
- // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
+ // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+ // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+ // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_mask
- // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
- // OGCG: bitcast <8 x i16> {{.*}} to <8 x half>
- // OGCG: fpext <8 x half> {{.*}} to <8 x float>
- // OGCG: bitcast i8 {{.*}} to <8 x i1>
- // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer
- // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
+ // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+ // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+ // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
}
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
- // CIR: cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
- // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps512_mask
- // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16>
- // LLVM: bitcast <16 x i16> {{.*}} to <16 x half>
- // LLVM: fpext <16 x half> {{.*}} to <16 x float>
- // LLVM: bitcast i16 {{.*}} to <16 x i1>
- // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer
- // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
+ // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+ // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+ // LLVM: %{{.*}} = load i16, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps512_mask
- // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16>
- // OGCG: bitcast <16 x i16> {{.*}} to <16 x half>
- // OGCG: fpext <16 x half> {{.*}} to <16 x float>
- // OGCG: bitcast i16 {{.*}} to <16 x i1>
- // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer
- // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
+ // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+ // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+ // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+ // OGCG: %{{.*}} = load i16, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+
typedef short __v16hi __attribute__((__vector_size__(32)));
return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
}
@@ -109,32 +127,36 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
// CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
- // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i>
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+ // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps_maskz
- // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = load i8, ptr %{{.*}}
// LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
- // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
- // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
- // LLVM: ret <4 x float> {{.*}}
+ // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ // LLVM: ret <4 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps_maskz
- // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = load i8, ptr %{{.*}}
// OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
- // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
- // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
- // OGCG: ret <4 x float> {{.*}}
+ // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
}
@@ -148,23 +170,27 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps256_maskz
- // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
// LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
- // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
- // LLVM: ret <8 x float> {{.*}}
+ // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+ // LLVM: ret <8 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_maskz
- // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
// OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
- // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
- // OGCG: ret <8 x float> {{.*}}
+ // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+
typedef short __v8hi __attribute__((__vector_size__(16)));
return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
}
@@ -178,23 +204,27 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
// LLVM-LABEL: @test_vcvtph2ps512_maskz
- // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
// LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
- // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
- // LLVM: ret <16 x float> {{.*}}
+ // LLVM: %{{.*}} = load i16, ptr %{{.*}}
+ // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+ // LLVM: ret <16 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps512_maskz
- // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
// OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
- // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
- // OGCG: ret <16 x float> {{.*}}
+ // OGCG: %{{.*}} = load i16, ptr %{{.*}}
+ // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+ // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+
typedef short __v16hi __attribute__((__vector_size__(32)));
return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
}
>From 9a999f526c9fdc21ebcb2ee7c1718b49096a48cc Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Sat, 27 Dec 2025 14:35:47 +0000
Subject: [PATCH 16/16] Update test
---
.../CodeGenBuiltins/X86/avx512f16c-builtins.c | 93 ++-----------------
1 file changed, 9 insertions(+), 84 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index f26e0d5d5ccae..8ce29b57de275 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,36 +9,25 @@
__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps_mask
- // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}}
- // LLVM: %{{.*}} = load i8, ptr %{{.*}}
// LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
// LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
- // LLVM: ret <4 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps_mask
- // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}}
- // OGCG: %{{.*}} = load i8, ptr %{{.*}}
// OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
@@ -52,32 +41,20 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps256_mask
- // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // LLVM: %{{.*}} = load i8, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_mask
- // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // OGCG: %{{.*}} = load i8, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
@@ -87,32 +64,20 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
// CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps512_mask
- // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
- // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
- // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // LLVM: %{{.*}} = load i16, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps512_mask
- // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
- // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
- // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // OGCG: %{{.*}} = load i16, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
@@ -122,35 +87,20 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
- // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+ // CIR: %{{.*}} = cir.call @_mm_setzero_ps()
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
- // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps_maskz
- // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // LLVM: %{{.*}} = load i8, ptr %{{.*}}
- // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
// LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
- // LLVM: ret <4 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps_maskz
- // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // OGCG: %{{.*}} = load i8, ptr %{{.*}}
- // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
// OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
// OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
// OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
@@ -163,31 +113,18 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
- // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %{{.*}} = cir.call @_mm256_setzero_ps()
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
- // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps256_maskz
- // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // LLVM: %{{.*}} = load i8, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
- // LLVM: ret <8 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps256_maskz
- // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
- // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
- // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
// OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
- // OGCG: %{{.*}} = load i8, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
// OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
@@ -197,31 +134,19 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
// CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
- // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float>
- // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
- // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
- // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.call @_mm512_setzero_ps()
// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
- // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
+ // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
// LLVM-LABEL: @test_vcvtph2ps512_maskz
- // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
- // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
// LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // LLVM: %{{.*}} = load i16, ptr %{{.*}}
// LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
- // LLVM: ret <16 x float> %{{.*}}
// OGCG-LABEL: @test_vcvtph2ps512_maskz
- // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
- // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
// OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
// OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
- // OGCG: %{{.*}} = load i16, ptr %{{.*}}
// OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
// OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
More information about the cfe-commits
mailing list