[clang] 3b09e53 - [ARM] Remove duplicate fp16 intrinsics
David Green via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 28 06:26:22 PDT 2022
Author: David Green
Date: 2022-07-28T14:26:17+01:00
New Revision: 3b09e532ee396bb07820ecadb29e1ed88f6e6c25
URL: https://github.com/llvm/llvm-project/commit/3b09e532ee396bb07820ecadb29e1ed88f6e6c25
DIFF: https://github.com/llvm/llvm-project/commit/3b09e532ee396bb07820ecadb29e1ed88f6e6c25.diff
LOG: [ARM] Remove duplicate fp16 intrinsics
These vdup and vmov float16 intrinsics are being defined in both the
general section and then again in fp16 under a !aarch64 flag. The
vdup_lane intrinsics were being defined in both aarch64 and !aarch64
sections, so have been commoned. They are defined as macros, so do not
give duplicate warnings, but removing the duplicates shouldn't alter the
available intrinsics.
Added:
Modified:
clang/include/clang/Basic/arm_neon.td
clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 2e9798129fdfb..93f9961931370 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -530,7 +530,7 @@ def VMOV_N : WOpInst<"vmov_n", ".1",
}
let InstName = "" in
def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
- "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+ "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
OP_DUP_LN>;
////////////////////////////////////////////////////////////////////////////////
@@ -980,7 +980,7 @@ def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value
-def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>;
+def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>;
def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
"csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
OP_DUP_LN> {
@@ -1644,7 +1644,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs
def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
let isLaneQ = 1;
}
-}
+
+} // ArchGuard = "defined(__aarch64__)"
// ARMv8.2-A FP16 vector intrinsics for A32/A64.
let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
@@ -1763,15 +1764,6 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
def VUZPH : WInst<"vuzp", "2..", "hQh">;
def VTRNH : WInst<"vtrn", "2..", "hQh">;
-
- let ArchGuard = "!defined(__aarch64__)" in {
- // Set all lanes to same value.
- // Already implemented prior to ARMv8.2-A.
- def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>;
- def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>;
- def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>;
- }
-
// Vector Extract
def VEXTH : WInst<"vext", "...I", "hQh">;
diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
index 08e7fecd1330f..3dc3a49a9bfd5 100644
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -1754,15 +1754,15 @@ float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_lane_f16
// CHECK-SAME: (half noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_851:%.*]] = alloca <4 x half>, align 8
-// CHECK-NEXT: [[__REINT1_851:%.*]] = alloca i16, align 2
+// CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8
+// CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
-// CHECK-NEXT: store <4 x half> [[B]], <4 x half>* [[__REINT_851]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_851]] to <4 x i16>*
+// CHECK-NEXT: store <4 x half> [[B]], <4 x half>* [[__REINT_847]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_847]] to <4 x i16>*
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
-// CHECK-NEXT: store i16 [[VGET_LANE]], i16* [[__REINT1_851]], align 2
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_851]] to half*
+// CHECK-NEXT: store i16 [[VGET_LANE]], i16* [[__REINT1_847]], align 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_847]] to half*
// CHECK-NEXT: [[TMP3:%.*]] = load half, half* [[TMP2]], align 2
// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP3]] to float
// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
@@ -1776,15 +1776,15 @@ float16_t test_vmulh_lane_f16(float16_t a, float16x4_t b) {
// CHECK-LABEL: define {{[^@]+}}@test_vmulh_laneq_f16
// CHECK-SAME: (half noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[__REINT_854:%.*]] = alloca <8 x half>, align 16
-// CHECK-NEXT: [[__REINT1_854:%.*]] = alloca i16, align 2
+// CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[CONV:%.*]] = fpext half [[A]] to float
-// CHECK-NEXT: store <8 x half> [[B]], <8 x half>* [[__REINT_854]], align 16
-// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_854]] to <8 x i16>*
+// CHECK-NEXT: store <8 x half> [[B]], <8 x half>* [[__REINT_850]], align 16
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_850]] to <8 x i16>*
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
-// CHECK-NEXT: store i16 [[VGETQ_LANE]], i16* [[__REINT1_854]], align 2
-// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_854]] to half*
+// CHECK-NEXT: store i16 [[VGETQ_LANE]], i16* [[__REINT1_850]], align 2
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[__REINT1_850]] to half*
// CHECK-NEXT: [[TMP3:%.*]] = load half, half* [[TMP2]], align 2
// CHECK-NEXT: [[CONV2:%.*]] = fpext half [[TMP3]] to float
// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[CONV2]]
@@ -2281,6 +2281,30 @@ float16x8_t test_vdupq_lane_f16(float16x4_t a) {
return vdupq_lane_f16(a, 3);
}
+// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT: ret <4 x half> [[LANE]]
+//
+float16x4_t test_vdup_laneq_f16(float16x8_t a) {
+ return vdup_laneq_f16(a, 1);
+}
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT: ret <8 x half> [[LANE]]
+//
+float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
+ return vdupq_laneq_f16(a, 7);
+}
+
// CHECK-LABEL: define {{[^@]+}}@test_vext_f16
// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: entry:
More information about the cfe-commits
mailing list