[clang] [CIR][AArch64] Lower NEON vbsl builtins (PR #188449)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 25 02:53:26 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: Jiahao Guo (E00N777)
<details>
<summary>Changes</summary>
Part of https://github.com/llvm/llvm-project/issues/185382
Lowering:
- test_vbsl_s8
- test_vbslq_s8
- test_vbsl_s16
- test_vbslq_s16
- test_vbsl_f32
- test_vbslq_f32
I reused the lowering logic from the [incubator](https://github.com/llvm/clangir/blob/main/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp) implementation and added a corresponding helper function in the upstream file. like this way:
```
case NEON::BI__builtin_neon_vbsl_v:
case NEON::BI__builtin_neon_vbslq_v: {
cir::VectorType bitTy = vTy;
if (cir::isAnyFloatingPointType(bitTy.getElementType()))
bitTy = castVecOfFPTypeToVecOfIntWithSameWidth(builder, vTy);
Ops[0] = builder.createBitcast(Ops[0], bitTy);
Ops[1] = builder.createBitcast(Ops[1], bitTy);
Ops[2] = builder.createBitcast(Ops[2], bitTy);
Ops[1] = builder.createAnd(Ops[0], Ops[1]);
Ops[2] = builder.createAnd(builder.createNot(Ops[0]), Ops[2]);
Ops[0] = builder.createOr(Ops[1], Ops[2]);
return builder.createBitcast(Ops[0], ty);
}
```
and
```
static cir::VectorType
castVecOfFPTypeToVecOfIntWithSameWidth(CIRGenBuilderTy &builder,
cir::VectorType vecTy) {
if (mlir::isa<cir::SingleType>(vecTy.getElementType()))
return cir::VectorType::get(builder.getSInt32Ty(), vecTy.getSize());
if (mlir::isa<cir::DoubleType>(vecTy.getElementType()))
return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize());
llvm_unreachable(
"Unsupported element type in getVecOfIntTypeWithSameEltWidth");
}
```
If this is not the preferred way to structure it, I’d be happy to adjust it based on your feedback.
For FileCheck coverage, I moved the relevant test cases from `clang/test/CodeGen/AArch64/neon-intrinsics.c` into `clang/test/CodeGen/AArch64/neon/intrinsics.c`.
I was not entirely sure whether the Bitwise select coverage should go into a separate dedicated test file, so for now I kept it in `clang/test/CodeGen/AArch64/neon/intrinsics.c`.
---
Patch is 20.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/188449.diff
3 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp (+27-2)
- (modified) clang/test/CodeGen/AArch64/neon-intrinsics.c (-107)
- (modified) clang/test/CodeGen/AArch64/neon/intrinsics.c (+155)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index a3488bfcc3dec..3a0cc766478a3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -14,6 +14,7 @@
#include "CIRGenFunction.h"
#include "clang/Basic/AArch64CodeGenUtils.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
// TODO(cir): once all builtins are covered, decide whether we still
@@ -23,6 +24,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Value.h"
#include "clang/AST/GlobalDecl.h"
#include "clang/Basic/Builtins.h"
@@ -169,6 +171,17 @@ static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags typeFlags,
llvm_unreachable("Unknown vector element type!");
}
+static cir::VectorType
+castVecOfFPTypeToVecOfIntWithSameWidth(CIRGenBuilderTy &builder,
+ cir::VectorType vecTy) {
+ if (mlir::isa<cir::SingleType>(vecTy.getElementType()))
+ return cir::VectorType::get(builder.getSInt32Ty(),vecTy.getSize());
+ if (mlir::isa<cir::DoubleType>(vecTy.getElementType()))
+ return cir::VectorType::get(builder.getSInt64Ty(), vecTy.getSize());
+ llvm_unreachable(
+ "Unsupported element type in getVecOfIntTypeWithSameEltWidth");
+}
+
static mlir::Value emitCommonNeonBuiltinExpr(
CIRGenFunction &cgf, unsigned builtinID, unsigned llvmIntrinsic,
unsigned altLLVMIntrinsic, const char *nameHint, unsigned modifier,
@@ -1677,7 +1690,7 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
return mlir::Value{};
}
- // Memory Operations (MOPS)
+ // Memory Operations (Mops)
if (builtinID == AArch64::BI__builtin_arm_mops_memset_tag) {
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented AArch64 builtin call: ") +
@@ -2196,7 +2209,19 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
default:
return std::nullopt;
case NEON::BI__builtin_neon_vbsl_v:
- case NEON::BI__builtin_neon_vbslq_v:
+ case NEON::BI__builtin_neon_vbslq_v: {
+ cir::VectorType bitTy = ty;
+ if(cir::isAnyFloatingPointType(bitTy.getElementType()))
+ bitTy = castVecOfFPTypeToVecOfIntWithSameWidth(builder, bitTy);
+ ops[0] = builder.createBitcast(ops[0], bitTy);
+ ops[1] = builder.createBitcast(ops[1], bitTy);
+ ops[2] = builder.createBitcast(ops[2], bitTy);
+
+ ops[1] = builder.createAnd(loc, ops[0], ops[1]);
+ ops[2] = builder.createAnd(loc, builder.createNot(ops[0]), ops[2]);
+ ops[0] = builder.createOr(loc, ops[1], ops[2]);
+ return builder.createBitcast(ops[0], ty);
+ }
case NEON::BI__builtin_neon_vfma_lane_v:
case NEON::BI__builtin_neon_vfmaq_lane_v:
case NEON::BI__builtin_neon_vfma_laneq_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 8eb6cd86339d6..c01edc93267b7 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -1038,39 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
return vdiv_f32(v1, v2);
}
-// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]]
-// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1)
-// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]]
-// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]]
-//
-int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
- return vbsl_s8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_s16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
-// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
-// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
-// CHECK-NEXT: ret <8 x i8> [[TMP4]]
-//
-int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
- return (int8x8_t)vbsl_s16(v1, v2, v3);
-}
-
// CHECK-LABEL: define dso_local <2 x i32> @test_vbsl_s32(
// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1179,28 +1146,6 @@ uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
return vbsl_u64(v1, v2, v3);
}
-// CHECK-LABEL: define dso_local <2 x float> @test_vbsl_f32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]], <2 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP5]], [[VBSL2_I]]
-// CHECK-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
-// CHECK-NEXT: ret <2 x float> [[TMP6]]
-//
-float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
- return vbsl_f32(v1, v2, v3);
-}
-
// CHECK-LABEL: define dso_local <1 x double> @test_vbsl_f64(
// CHECK-SAME: <1 x i64> noundef [[V1:%.*]], <1 x double> noundef [[V2:%.*]], <1 x double> noundef [[V3:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1257,37 +1202,6 @@ poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
return vbsl_p16(v1, v2, v3);
}
-// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_s8(
-// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]]
-// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1)
-// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]]
-// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]]
-//
-int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
- return vbslq_s8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vbslq_s16(
-// CHECK-SAME: <8 x i16> noundef [[V1:%.*]], <8 x i16> noundef [[V2:%.*]], <8 x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
-// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
-// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: ret <8 x i16> [[VBSL5_I]]
-//
-int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
- return vbslq_s16(v1, v2, v3);
-}
// CHECK-LABEL: define dso_local <4 x i32> @test_vbslq_s32(
// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x i32> noundef [[V2:%.*]], <4 x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
@@ -1397,27 +1311,6 @@ uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
return vbslq_u64(v1, v2, v3);
}
-// CHECK-LABEL: define dso_local <4 x float> @test_vbslq_f32(
-// CHECK-SAME: <4 x i32> noundef [[V1:%.*]], <4 x float> noundef [[V2:%.*]], <4 x float> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
-// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
-// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x i32>
-// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
-// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i32> [[VBSL_I]], [[VBSL1_I]]
-// CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[VBSL_I]], splat (i32 -1)
-// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP5]], [[VBSL2_I]]
-// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
-// CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[TMP6]]
-//
-float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
- return vbslq_f32(v1, v2, v3);
-}
// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_p8(
// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index bf8e62feda8da..0375d3ab02647 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -982,3 +982,158 @@ int64_t test_vshld_u64(int64_t a,int64_t b) {
return (int64_t)vshld_u64(a, b);
}
+// LLVM-LABEL: @test_vbsl_s8(
+// CIR-LABEL: @vbsl_s8(
+int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
+ // CIR: [[MASK_PTR:%.*]] = cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !u8i>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: [[AND:%.*]] = cir.and %{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>
+ // CIR: [[NOT:%.*]] = cir.not %{{.*}} : !cir.vector<8 x !s8i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], %{{.*}} : !cir.vector<8 x !s8i>
+ // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<8 x !s8i>
+
+
+ // LLVM: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]]
+ // LLVM-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1)
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]]
+ // LLVM-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+ // LLVM-NEXT: ret <8 x i8> [[VBSL2_I]]
+ return vbsl_s8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vbslq_s8(
+// CIR-LABEL: @vbslq_s8(
+int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<16 x !u8i>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: [[AND:%.*]] = cir.and %{{.*}}, %{{.*}} : !cir.vector<16 x !s8i>
+ // CIR: [[NOT:%.*]] = cir.not %{{.*}} : !cir.vector<16 x !s8i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], %{{.*}} : !cir.vector<16 x !s8i>
+ // CIR: cir.or [[AND]], [[AND2]] : !cir.vector<16 x !s8i>
+
+ // LLVM: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]]
+ // LLVM-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1)
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]]
+ // LLVM-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+ // LLVM-NEXT: ret <16 x i8> [[VBSL2_I]]
+ return vbslq_s8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vbsl_s16(
+// CIR-LABEL: @vbsl_s16(
+int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !u16i>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !s16i>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !s16i>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+ // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+ // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i>
+ // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<4 x !s16i>
+ // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<4 x !s16i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<4 x !s16i>
+ // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<4 x !s16i>
+
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+ // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+ // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+ // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+ // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+ // LLVM-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[VBSL_I]], [[VBSL1_I]]
+ // LLVM-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[VBSL_I]], splat (i16 -1)
+ // LLVM-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+ // LLVM-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+ // LLVM-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
+ // LLVM-NEXT: ret <8 x i8> [[TMP4]]
+ return (int8x8_t)vbsl_s16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vbslq_s16(
+// CIR-LABEL: @vbslq_s16(
+int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !u16i>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !s16i>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<8 x !s16i>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
+ // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
+ // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i>
+ // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<8 x !s16i>
+ // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<8 x !s16i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<8 x !s16i>
+ // CIR: [[RES:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<8 x !s16i>
+
+ // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V1]] to <16 x i8>
+ // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V2]] to <16 x i8>
+ // LLVM-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[V3]] to <16 x i8>
+ // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+ // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+ // LLVM-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+ // LLVM-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[VBSL_I]], splat (i16 -1)
+ // LLVM-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+ // LLVM-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+ // LLVM-NEXT: ret <8 x i16> [[VBSL5_I]]
+ return vbslq_s16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vbsl_f32(
+// CIR-LABEL: @vbsl_f32(
+float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !u32i>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !cir.float>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<2 x !cir.float>> -> !cir.ptr<!cir.vector<8 x !s8i>>
+ // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+ // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+ // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i>
+ // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<2 x !s32i>
+ // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<2 x !s32i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<2 x !s32i>
+ // CIR: [[OR:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<2 x !s32i>
+ // CIR: cir.cast bitcast [[OR]] : !cir.vector<2 x !s32i> -> !cir.vector<2 x !cir.float>
+
+ // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+ // LLVM-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[V3]] to <2 x i32>
+ // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+ // LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+ // LLVM-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+ // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
+ // LLVM-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+ // LLVM-NEXT: [[VBSL3_I:%.*]] = and <2 x i32> [[VBSL_I]], [[VBSL1_I]]
+ // LLVM-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[VBSL_I]], splat (i32 -1)
+ // LLVM-NEXT: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP5]], [[VBSL2_I]]
+ // LLVM-NEXT: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+ // LLVM-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+ // LLVM-NEXT: ret <2 x float> [[TMP6]]
+ return vbsl_f32(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vbslq_f32(
+// CIR-LABEL: @vbslq_f32(
+float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !u32i>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>> -> !cir.ptr<!cir.vector<16 x !s8i>>
+ // CIR: [[MASK:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
+ // CIR: [[VAL1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
+ // CIR: [[VAL2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i>
+ // CIR: [[AND:%.*]] = cir.and [[MASK]], [[VAL1]] : !cir.vector<4 x !s32i>
+ // CIR: [[NOT:%.*]] = cir.not [[MASK]] : !cir.vector<4 x !s32i>
+ // CIR: [[AND2:%.*]] = cir.and [[NOT]], [[VAL2]] : !cir.vector<4 x !s32i>
+ // CIR: [[OR:%.*]] = cir.or [[AND]], [[AND2]] : !cir.vector<4 x !s32i>
+ // CIR: cir.cast bitcast [[OR]] : !cir.vector<4 x !s32i> -> !cir.vector<4 x !cir.float>
+
+ // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V2]] to <4 x i32>
+ // LLVM-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V3]] to <4 x i32>
+ // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[V1]] to <16 x i8>
+ // LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+ // LLVM-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+ // LLVM-NEXT: [[VBSL_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
+ // LLVM-NEXT: [[VBSL1_I:%.*]] = ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/188449
More information about the cfe-commits
mailing list