[clang] [CIR][AArch64] Support BF16/FP16 NEON types and lower vdup lane builtins (PR #187460)

Mon Mar 23 04:29:46 PDT 2026

================
@@ -34,3 +34,50 @@ bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) {
   // LLVM: ret bfloat [[VGETQ_LANE]]
   return vduph_laneq_bf16(v, 7);
 }
+
+// ALL-LABEL: @test_vdup_lane_bf16(
+bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !u16i>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<4 x !u16i>
+  // LLVM: shufflevector <4 x {{.*}}> {{.*}}, <4 x {{.*}}> {{.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  return vdup_lane_bf16(v, 1);
+}
+
+// ALL-LABEL: @test_vdupq_lane_bf16(
+bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !u16i>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<8 x !u16i>
+  // LLVM: shufflevector <4 x {{.*}}> {{.*}}, <4 x {{.*}}> {{.*}}, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  return vdupq_lane_bf16(v, 1);
+}
+
+// ALL-LABEL: @test_vdup_laneq_bf16(
+bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !u16i>) [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !u16i>
+  // LLVM: shufflevector <8 x {{.*}}> {{.*}}, <8 x {{.*}}> {{.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  return vdup_laneq_bf16(v, 7);
+}
+
+// ALL-LABEL: @test_vdupq_laneq_bf16(
+bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !u16i>) [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<8 x !u16i>
+  // LLVM: shufflevector <8 x {{.*}}> {{.*}}, <8 x {{.*}}> {{.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  return vdupq_laneq_bf16(v, 7);
+}
+
+// ALL-LABEL: @test_vdup_n_bf16(
+bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
+  // CIR: cir.call @vdup_n_bf16
+  // LLVM: insertelement <4 x bfloat> poison, bfloat %{{.*}}, i{{32|64}} 0
+  // LLVM: insertelement <4 x bfloat> %{{.*}}, bfloat %{{.*}}, i{{32|64}} 3
+  // LLVM: ret <4 x bfloat>
+  return vdup_n_bf16(v);
+}
+
+// ALL-LABEL: @test_vdupq_n_bf16(
+bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
+  // CIR: cir.call @vdupq_n_bf16
+  // LLVM: insertelement <8 x bfloat> poison, bfloat %{{.*}}, i{{32|64}} 0
+  // LLVM: insertelement <8 x bfloat> %{{.*}}, bfloat %{{.*}}, i{{32|64}} 7
+  // LLVM: ret <8 x bfloat>
----------------
banach-space wrote:

These LLVM `CHECK` lines are a bit different compared to the original `CHECK` lines from bf16-getset-intrinsics.c. Could you update this and other tests so that the changes to LLVM `CHECK` lines are minimal?

https://github.com/llvm/llvm-project/pull/187460