[clang] [Clang][LoongArch] Comply with the lp64d ABI to pass vector arguments (PR #74990)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Dec 10 05:06:26 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: None (yjijd)
<details>
<summary>Changes</summary>
LoongArch gcc complies with the lp64d calling convention to pass vector arguments currently. This patch makes clang be consistent with gcc.
---
Patch is 1.89 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74990.diff
5 Files Affected:
- (modified) clang/lib/CodeGen/Targets/LoongArch.cpp (-7)
- (modified) clang/test/CodeGen/LoongArch/lasx/builtin-alias.c (+3416-1460)
- (modified) clang/test/CodeGen/LoongArch/lasx/builtin.c (+3415-1459)
- (modified) clang/test/CodeGen/LoongArch/lsx/builtin-alias.c (+3327-1419)
- (modified) clang/test/CodeGen/LoongArch/lsx/builtin.c (+3327-1419)
``````````diff
diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp
index 7b2c31139b0b2..63b9a1fdb988c 100644
--- a/clang/lib/CodeGen/Targets/LoongArch.cpp
+++ b/clang/lib/CodeGen/Targets/LoongArch.cpp
@@ -324,13 +324,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
return ABIArgInfo::getDirect();
}
- // Pass 128-bit/256-bit vector values via vector registers directly.
- if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) &&
- (getTarget().hasFeature("lsx"))) ||
- ((getContext().getTypeSize(Ty) == 256) &&
- getTarget().hasFeature("lasx"))))
- return ABIArgInfo::getDirect();
-
// Complex types for the *f or *d ABI must be passed directly rather than
// using CoerceAndExpand.
if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) {
diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
index 09b2d5fcacf53..9a8ce224bcfd0 100644
--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
+++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c
@@ -5,4426 +5,6382 @@
// CHECK-LABEL: @xvsll_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]])
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); }
// CHECK-LABEL: @xvsll_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]])
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); }
// CHECK-LABEL: @xvsll_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]])
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); }
// CHECK-LABEL: @xvsll_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]])
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); }
// CHECK-LABEL: @xvslli_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1)
+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); }
// CHECK-LABEL: @xvslli_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1)
+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); }
// CHECK-LABEL: @xvslli_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1)
+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); }
// CHECK-LABEL: @xvslli_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1)
+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); }
// CHECK-LABEL: @xvsra_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]])
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); }
// CHECK-LABEL: @xvsra_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]])
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); }
// CHECK-LABEL: @xvsra_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]])
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); }
// CHECK-LABEL: @xvsra_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]])
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); }
// CHECK-LABEL: @xvsrai_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1)
+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); }
// CHECK-LABEL: @xvsrai_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1)
+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); }
// CHECK-LABEL: @xvsrai_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1)
+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); }
// CHECK-LABEL: @xvsrai_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1)
+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); }
// CHECK-LABEL: @xvsrar_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]])
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); }
// CHECK-LABEL: @xvsrar_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]])
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); }
// CHECK-LABEL: @xvsrar_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]])
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); }
// CHECK-LABEL: @xvsrar_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]])
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); }
// CHECK-LABEL: @xvsrari_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1)
+// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); }
// CHECK-LABEL: @xvsrari_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1)
+// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); }
// CHECK-LABEL: @xvsrari_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1)
+// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); }
// CHECK-LABEL: @xvsrari_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1)
+// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); }
// CHECK-LABEL: @xvsrl_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1:%.*]], <32 x i8> [[_2:%.*]])
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
+// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); }
// CHECK-LABEL: @xvsrl_h(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1:%.*]], <16 x i16> [[_2:%.*]])
-// CHECK-NEXT: ret <16 x i16> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
+// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); }
// CHECK-LABEL: @xvsrl_w(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1:%.*]], <8 x i32> [[_2:%.*]])
-// CHECK-NEXT: ret <8 x i32> [[TMP0]]
+// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
+// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); }
// CHECK-LABEL: @xvsrl_d(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1:%.*]], <4 x i64> [[_2:%.*]])
-// CHECK-NEXT: ret <4 x i64> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
+// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: ret void
//
v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); }
// CHECK-LABEL: @xvsrli_b(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1:%.*]], i32 1)
-// CHECK-NEXT: ret <32 x i8> [[TMP0]]
+// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.lo...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/74990
More information about the cfe-commits
mailing list