[clang] [llvm] [AArch64][NEON] Add intrinsics for LUTI (PR #96883)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 28 04:54:23 PDT 2024
================
@@ -6420,6 +6420,76 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
let Predicates = [HasLUT] in {
defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
+
+ def : Pat<(v16i8 (int_aarch64_neon_vluti2_lane (v8i8 V64:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexS32b_timm:$idx))),
+ (LUT2_B (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexS32b_timm:$idx)>;
+ def : Pat<(v16i8 (int_aarch64_neon_vluti2_lane (v8i8 V64:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexS32b_timm:$idx))),
+ (LUT2_B (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), V128:$Rm, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(v16i8 (int_aarch64_neon_vluti2_lane (v16i8 V128:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexS32b_timm:$idx))),
+ (LUT2_B V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexS32b_timm:$idx)>;
+ def : Pat<(v16i8 (int_aarch64_neon_vluti2_lane (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexS32b_timm:$idx))),
+ (LUT2_B V128:$Rn, V128:$Rm, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(v8i16 (int_aarch64_neon_vluti2_lane (v4i16 V64:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8f16 (int_aarch64_neon_vluti2_lane (v4f16 V64:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8i16 (int_aarch64_neon_vluti2_lane (v4i16 V64:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), V128:$Rm, VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8f16 (int_aarch64_neon_vluti2_lane (v4f16 V64:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), V128:$Rm, VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8i16 (int_aarch64_neon_vluti2_lane (v8i16 V128:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8f16 (int_aarch64_neon_vluti2_lane (v8f16 V128:$Rn),
+ (v8i8 V64:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H V128:$Rn, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub), VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8i16 (int_aarch64_neon_vluti2_lane (v8i16 VecListOne8h:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H VecListOne8h:$Rn, V128:$Rm, VectorIndexH32b_timm:$idx)>;
+ def : Pat<(v8f16 (int_aarch64_neon_vluti2_lane (v8f16 VecListOne8h:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexH32b_timm:$idx))),
+ (LUT2_H VecListOne8h:$Rn, V128:$Rm, VectorIndexH32b_timm:$idx)>;
+
+ def : Pat<(v16i8 (int_aarch64_neon_vluti4q_laneq (v16i8 VecListOne16b:$Rn),
+ (v16i8 V128:$Rm), (i32 VectorIndexD32b_timm:$idx))),
+ (LUT4_B VecListOne16b:$Rn, V128:$Rm, VectorIndexD32b_timm:$idx)>;
+
+ def : Pat<(v8i16 (int_aarch64_neon_vluti4q_laneq_x2 (v8i16 VecListOne8h:$Rn1),
+ (v8i16 VecListOne8h:$Rn2), (v16i8 V128:$Rm),
+ (i32 VectorIndexS32b_timm:$idx))),
+ (LUT4_H (REG_SEQUENCE QQ, VecListOne8h:$Rn1, qsub0, VecListOne8h:$Rn2, qsub1), V128:$Rm, VectorIndexS32b_timm:$idx)>;
+ def : Pat<(v8f16 (int_aarch64_neon_vluti4q_laneq_x2 (v8f16 VecListOne8h:$Rn1),
+ (v8f16 VecListOne8h:$Rn2), (v16i8 V128:$Rm),
+ (i32 VectorIndexS32b_timm:$idx))),
+ (LUT4_H (REG_SEQUENCE QQ, VecListOne8h:$Rn1, qsub0, VecListOne8h:$Rn2, qsub1), V128:$Rm, VectorIndexS32b_timm:$idx)>;
+}
+
+let Predicates = [HasLUT, HasBF16] in {
----------------
Lukacma wrote:
Thank you for the suggestion. I have , hopefully, cleaned up patterns now.
https://github.com/llvm/llvm-project/pull/96883
More information about the cfe-commits
mailing list