[clang] [llvm] [AArch64][NEON] Add intrinsics for LUTI (PR #96883)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 1 01:38:52 PDT 2024


================
@@ -0,0 +1,433 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+#include <arm_neon.h>
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_u8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_u8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_u8(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_s8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_s8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_s8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_s8(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) {
+  return vluti2_lane_p8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8(
+// CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8.v16i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
+  return vluti2_laneq_p8(vn, vm, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v8i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 3)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
+  return vluti2q_lane_p8(vn, vm, 3);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8(
+// CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 7)
+// CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
+//
+poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
+  return vluti2q_laneq_p8(vn, vm, 7);
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16(
+// CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[VN]] to <8 x i8>
+// CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i8.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[VM]], i32 0)
+// CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE]]
+//
+uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) {
----------------
CarolineConcatto wrote:

This test is failing when trying to lower:
fatal error: error in backend: Cannot select: intrinsic %llvm.aarch64.neon.vluti2.lane
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: ./bin/clang -target aarch64-linux-gnu -march=armv8-a+lut+bf16+simd -S -o - ../clang/test/CodeGen/aarch64-neon-luti.c
1.	<eof> parser at end of file
2.	Code generation
3.	Running pass 'Function Pass Manager' on module '../clang/test/CodeGen/aarch64-neon-luti.c'.
4.	Running pass 'AArch64 Instruction Selection' on function '@test_vluti2_lane_u16'

https://github.com/llvm/llvm-project/pull/96883


More information about the cfe-commits mailing list