[llvm] [Scalarizer] Ensure valid VectorSplits for each struct element in `visitExtractValueInst` (PR #128538)
Deric Cheung via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 11:04:39 PST 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/128538
>From f50c0fa6ca7732c08178325d7fce3e006c2d5831 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 17 Feb 2025 16:47:09 +0000
Subject: [PATCH 1/4] [Scalarizer] Test `*_with_overflow` intrinsics with
min-bits
---
llvm/test/Transforms/Scalarizer/min-bits.ll | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index 97cc71626e208..377893ad7e6fd 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1081,6 +1081,17 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
ret <4 x half> %r
}
+define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
+; CHECK-LABEL: @call_v3i32(
+; CHECK-NEXT: [[T:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]])
+; CHECK-NEXT: [[R:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[T]], 0
+; CHECK-NEXT: ret <3 x i32> [[R]]
+;
+ %t = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %a, <3 x i32> %b)
+ %r = extractvalue { <3 x i32>, <3 x i1> } %t, 0
+ ret <3 x i32> %r
+}
+
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
>From 9b5080c5c6bc0f386e9c84aa64bc8a52a13785f2 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 05:16:49 +0000
Subject: [PATCH 2/4] [Scalarizer] Add check in visitExtractValueInst for
struct elements with differing bitness
---
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 25 ++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2b27150112ad8..820c8e12d2449 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -719,13 +719,12 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
std::optional<VectorSplit> CurrVS =
getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
- // This case does not seem to happen, but it is possible for
- // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
- // is not returned and we will bailout of handling this call.
- // The secondary bailout case is if NumPacked does not match.
- // This can happen if ScalarizeMinBits is not set to the default.
- // This means with certain ScalarizeMinBits intrinsics like frexp
- // will only scalarize when the struct elements have the same bitness.
+ // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
+ // VectorSplit is not returned and we will bailout of handling this call.
+ // The secondary bailout case is if NumPacked does not match. This can
+ // happen if ScalarizeMinBits is not set to the default. This means with
+ // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
+ // the struct elements have the same bitness.
if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
return false;
if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I, TTI))
@@ -1083,6 +1082,18 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
std::optional<VectorSplit> VS = getVectorSplit(VecType);
if (!VS)
return false;
+ for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
+ std::optional<VectorSplit> CurrVS =
+ getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
+ // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
+ // VectorSplit is not returned and we will bailout of handling this call.
+ // The secondary bailout case is if NumPacked does not match. This can
+ // happen if ScalarizeMinBits is not set to the default. This means with
+ // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
+ // the struct elements have the same bitness.
+ if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
+ return false;
+ }
IRBuilder<> Builder(&EVI);
Scatterer Op0 = scatter(&EVI, Op, *VS);
assert(!EVI.getIndices().empty() && "Make sure an index exists");
>From f40f03f6fd2b702a590f7066ff6a1923312a6a96 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 18:16:26 +0000
Subject: [PATCH 3/4] Add frexp to min-bits scalarizer test
---
llvm/test/Transforms/Scalarizer/min-bits.ll | 26 +++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index 377893ad7e6fd..c390560f16eaf 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1081,8 +1081,8 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
ret <4 x half> %r
}
-define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
-; CHECK-LABEL: @call_v3i32(
+define <3 x i32> @uadd_with_overflow_v3i32(<3 x i32> %a, <3 x i32> %b) {
+; CHECK-LABEL: @uadd_with_overflow_v3i32(
; CHECK-NEXT: [[T:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]])
; CHECK-NEXT: [[R:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[T]], 0
; CHECK-NEXT: ret <3 x i32> [[R]]
@@ -1092,6 +1092,28 @@ define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
ret <3 x i32> %r
}
+define noundef <2 x half> @frexp_v2f16(<2 x half> noundef %h) {
+; MIN16-LABEL: @frexp_v2f16(
+; MIN16-NEXT: [[H_I0:%.*]] = extractelement <2 x half> [[H:%.*]], i64 0
+; MIN16-NEXT: [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]])
+; MIN16-NEXT: [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1
+; MIN16-NEXT: [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]])
+; MIN16-NEXT: [[E0_ELEM0:%.*]] = extractvalue { half, i32 } [[R_I0]], 0
+; MIN16-NEXT: [[E0_ELEM01:%.*]] = extractvalue { half, i32 } [[R_I1]], 0
+; MIN16-NEXT: [[E0_UPTO0:%.*]] = insertelement <2 x half> poison, half [[E0_ELEM0]], i64 0
+; MIN16-NEXT: [[E0:%.*]] = insertelement <2 x half> [[E0_UPTO0]], half [[E0_ELEM01]], i64 1
+; MIN16-NEXT: ret <2 x half> [[E0]]
+;
+; MIN32-LABEL: @frexp_v2f16(
+; MIN32-NEXT: [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
+; MIN32-NEXT: [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
+; MIN32-NEXT: ret <2 x half> [[E0]]
+;
+ %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+ %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
+ ret <2 x half> %e0
+}
+
declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
>From 005b51bc634e5ed5af7600d01a461ed75f09233f Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 19:03:49 +0000
Subject: [PATCH 4/4] Add frexp to min-bits scalarizer test
---
llvm/test/Transforms/Scalarizer/min-bits.ll | 197 ++++++++++++++++++++
1 file changed, 197 insertions(+)
diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index c390560f16eaf..f9e6774ffff64 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=16>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN16
; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=32>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN32
+; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=64>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN64
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
@@ -23,6 +24,13 @@ define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
; MIN32-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]]
; MIN32-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @load_add_store_v2i16(
+; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]]
+; MIN64-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT: ret void
;
%a = load <2 x i16>, ptr %pa, align 8
%b = load <2 x i16>, ptr %pb, align 8
@@ -63,6 +71,13 @@ define void @load_add_store_v3i16(ptr %pa, ptr %pb) {
; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8
; MIN32-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @load_add_store_v3i16(
+; MIN64-NEXT: [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[B:%.*]] = load <3 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT: [[C:%.*]] = add <3 x i16> [[A]], [[B]]
+; MIN64-NEXT: store <3 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT: ret void
;
%a = load <3 x i16>, ptr %pa, align 8
%b = load <3 x i16>, ptr %pb, align 8
@@ -109,6 +124,13 @@ define void @load_add_store_v4i16(ptr %pa, ptr %pb) {
; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8
; MIN32-NEXT: store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @load_add_store_v4i16(
+; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[B:%.*]] = load <4 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT: [[C:%.*]] = add <4 x i16> [[A]], [[B]]
+; MIN64-NEXT: store <4 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT: ret void
;
%a = load <4 x i16>, ptr %pa, align 8
%b = load <4 x i16>, ptr %pb, align 8
@@ -153,6 +175,13 @@ define void @load_add_store_v4i10(ptr %pa, ptr %pb) {
; MIN32-NEXT: [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3
; MIN32-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @load_add_store_v4i10(
+; MIN64-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT: [[C:%.*]] = add <4 x i10> [[A]], [[B]]
+; MIN64-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT: ret void
;
%a = load <4 x i10>, ptr %pa, align 8
%b = load <4 x i10>, ptr %pb, align 8
@@ -176,6 +205,10 @@ define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b,
; MIN32-LABEL: @select_uniform_condition_v2f16(
; MIN32-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
; MIN32-NEXT: ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v2f16(
+; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
+; MIN64-NEXT: ret <2 x half> [[R]]
;
%r = select i1 %cc, <2 x half> %a, <2 x half> %b
ret <2 x half> %r
@@ -207,6 +240,10 @@ define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b,
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT: ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v3f16(
+; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <3 x half> [[A:%.*]], <3 x half> [[B:%.*]]
+; MIN64-NEXT: ret <3 x half> [[R]]
;
%r = select i1 %cc, <3 x half> %a, <3 x half> %b
ret <3 x half> %r
@@ -243,6 +280,10 @@ define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b,
; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v4f16(
+; MIN64-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]]
+; MIN64-NEXT: ret <4 x half> [[R]]
;
%r = select i1 %cc, <4 x half> %a, <4 x half> %b
ret <4 x half> %r
@@ -270,6 +311,10 @@ define <2 x half> @unary_v2f16(<2 x half> %a) {
; MIN32-LABEL: @unary_v2f16(
; MIN32-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]]
; MIN32-NEXT: ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v2f16(
+; MIN64-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]]
+; MIN64-NEXT: ret <2 x half> [[R]]
;
%r = fneg <2 x half> %a
ret <2 x half> %r
@@ -296,6 +341,10 @@ define <3 x half> @unary_v3f16(<3 x half> %a) {
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT: ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v3f16(
+; MIN64-NEXT: [[R:%.*]] = fneg <3 x half> [[A:%.*]]
+; MIN64-NEXT: ret <3 x half> [[R]]
;
%r = fneg <3 x half> %a
ret <3 x half> %r
@@ -326,6 +375,10 @@ define <4 x half> @unary_v4f16(<4 x half> %a) {
; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v4f16(
+; MIN64-NEXT: [[R:%.*]] = fneg <4 x half> [[A:%.*]]
+; MIN64-NEXT: ret <4 x half> [[R]]
;
%r = fneg <4 x half> %a
ret <4 x half> %r
@@ -346,6 +399,10 @@ define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) {
; MIN32-LABEL: @binary_v2f16(
; MIN32-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
; MIN32-NEXT: ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v2f16(
+; MIN64-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT: ret <2 x half> [[R]]
;
%r = fadd <2 x half> %a, %b
ret <2 x half> %r
@@ -377,6 +434,10 @@ define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT: ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v3f16(
+; MIN64-NEXT: [[R:%.*]] = fadd <3 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT: ret <3 x half> [[R]]
;
%r = fadd <3 x half> %a, %b
ret <3 x half> %r
@@ -413,6 +474,10 @@ define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v4f16(
+; MIN64-NEXT: [[R:%.*]] = fadd <4 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT: ret <4 x half> [[R]]
;
%r = fadd <4 x half> %a, %b
ret <4 x half> %r
@@ -431,6 +496,10 @@ define <2 x i16> @fptosi_v2f16(<2 x half> %a) {
; MIN32-LABEL: @fptosi_v2f16(
; MIN32-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
; MIN32-NEXT: ret <2 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v2f16(
+; MIN64-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
+; MIN64-NEXT: ret <2 x i16> [[R]]
;
%r = fptosi <2 x half> %a to <2 x i16>
ret <2 x i16> %r
@@ -457,6 +526,10 @@ define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2
; MIN32-NEXT: ret <3 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v3f16(
+; MIN64-NEXT: [[R:%.*]] = fptosi <3 x half> [[A:%.*]] to <3 x i16>
+; MIN64-NEXT: ret <3 x i16> [[R]]
;
%r = fptosi <3 x half> %a to <3 x i16>
ret <3 x i16> %r
@@ -487,6 +560,10 @@ define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v4f16(
+; MIN64-NEXT: [[R:%.*]] = fptosi <4 x half> [[A:%.*]] to <4 x i16>
+; MIN64-NEXT: ret <4 x i16> [[R]]
;
%r = fptosi <4 x half> %a to <4 x i16>
ret <4 x i16> %r
@@ -511,6 +588,10 @@ define <4 x float> @fpext_v4f16(<4 x half> %a) {
; MIN32-LABEL: @fpext_v4f16(
; MIN32-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
; MIN32-NEXT: ret <4 x float> [[R]]
+;
+; MIN64-LABEL: @fpext_v4f16(
+; MIN64-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
+; MIN64-NEXT: ret <4 x float> [[R]]
;
%r = fpext <4 x half> %a to <4 x float>
ret <4 x float> %r
@@ -544,6 +625,10 @@ define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) {
; MIN32-LABEL: @gep1_v4(
; MIN32-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
; MIN32-NEXT: ret <4 x ptr> [[P]]
+;
+; MIN64-LABEL: @gep1_v4(
+; MIN64-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
+; MIN64-NEXT: ret <4 x ptr> [[P]]
;
%p = getelementptr i32, ptr %base, <4 x i16> %a
ret <4 x ptr> %p
@@ -592,6 +677,10 @@ define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) {
; MIN32-LABEL: @gep3_v4(
; MIN32-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
; MIN32-NEXT: ret <4 x ptr> [[P]]
+;
+; MIN64-LABEL: @gep3_v4(
+; MIN64-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
+; MIN64-NEXT: ret <4 x ptr> [[P]]
;
%p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a
ret <4 x ptr> %p
@@ -609,6 +698,11 @@ define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) {
; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
; MIN32-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @insertelement_v2i16(
+; MIN64-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
+; MIN64-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4
+; MIN64-NEXT: ret void
;
%r = insertelement <2 x i16> %a, i16 %b, i64 1
store <2 x i16> %r, ptr %p
@@ -632,6 +726,11 @@ define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) {
; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8
; MIN32-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @insertelement_v3i16(
+; MIN64-NEXT: [[R:%.*]] = insertelement <3 x i16> [[A:%.*]], i16 [[B:%.*]], i64 2
+; MIN64-NEXT: store <3 x i16> [[R]], ptr [[P:%.*]], align 8
+; MIN64-NEXT: ret void
;
%r = insertelement <3 x i16> %a, i16 %b, i64 2
store <3 x i16> %r, ptr %p
@@ -660,6 +759,11 @@ define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) {
; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8
; MIN32-NEXT: store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @insertelement_v4i16(
+; MIN64-NEXT: [[R:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 [[B:%.*]], i64 3
+; MIN64-NEXT: store <4 x i16> [[R]], ptr [[P:%.*]], align 8
+; MIN64-NEXT: ret void
;
%r = insertelement <4 x i16> %a, i16 %b, i64 3
store <4 x i16> %r, ptr %p
@@ -677,6 +781,11 @@ define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) {
; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
; MIN32-NEXT: ret <2 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v2i16(
+; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
+; MIN64-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
+; MIN64-NEXT: ret <2 x i16> [[R]]
;
%a = load <2 x i16>, ptr %pa
%r = insertelement <2 x i16> %a, i16 %b, i64 1
@@ -698,6 +807,11 @@ define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2
; MIN32-NEXT: ret <3 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v3i16(
+; MIN64-NEXT: [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[R:%.*]] = insertelement <3 x i16> [[A]], i16 [[B:%.*]], i64 2
+; MIN64-NEXT: ret <3 x i16> [[R]]
;
%a = load <3 x i16>, ptr %pa
%r = insertelement <3 x i16> %a, i16 %b, i64 2
@@ -726,6 +840,11 @@ define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
; MIN32-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v4i16(
+; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[R:%.*]] = insertelement <4 x i16> [[A]], i16 [[B:%.*]], i64 3
+; MIN64-NEXT: ret <4 x i16> [[R]]
;
%a = load <4 x i16>, ptr %pa
%r = insertelement <4 x i16> %a, i16 %b, i64 3
@@ -758,6 +877,13 @@ define void @shufflevector_grow(ptr %pa, ptr %pb) {
; MIN32-NEXT: [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; MIN32-NEXT: store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @shufflevector_grow(
+; MIN64-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
+; MIN64-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4
+; MIN64-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; MIN64-NEXT: store <4 x i16> [[R]], ptr [[PA]], align 8
+; MIN64-NEXT: ret void
;
%a = load <2 x i16>, ptr %pa
%b = load <2 x i16>, ptr %pb
@@ -786,6 +912,12 @@ define void @shufflevector_shrink(ptr %pa) {
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
; MIN32-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4
; MIN32-NEXT: ret void
+;
+; MIN64-LABEL: @shufflevector_shrink(
+; MIN64-NEXT: [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; MIN64-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4
+; MIN64-NEXT: ret void
;
%a = load <4 x i16>, ptr %pa
%r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
@@ -832,6 +964,22 @@ define void @phi_v2f16(ptr %base, i64 %bound) {
; MIN32-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
; MIN32-NEXT: ret void
;
+; MIN64-LABEL: @phi_v2f16(
+; MIN64-NEXT: entry:
+; MIN64-NEXT: br label [[LOOP:%.*]]
+; MIN64: loop:
+; MIN64-NEXT: [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT: [[A:%.*]] = load <2 x half>, ptr [[P]], align 2
+; MIN64-NEXT: [[X_NEXT]] = fadd <2 x half> [[X]], [[A]]
+; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64: end:
+; MIN64-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
+; MIN64-NEXT: ret void
+;
entry:
br label %loop
@@ -901,6 +1049,22 @@ define void @phi_v3f16(ptr %base, i64 %bound) {
; MIN32-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
; MIN32-NEXT: ret void
;
+; MIN64-LABEL: @phi_v3f16(
+; MIN64-NEXT: entry:
+; MIN64-NEXT: br label [[LOOP:%.*]]
+; MIN64: loop:
+; MIN64-NEXT: [[X:%.*]] = phi <3 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT: [[A:%.*]] = load <3 x half>, ptr [[P]], align 2
+; MIN64-NEXT: [[X_NEXT]] = fadd <3 x half> [[X]], [[A]]
+; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64: end:
+; MIN64-NEXT: store <3 x half> [[X_NEXT]], ptr [[BASE]], align 8
+; MIN64-NEXT: ret void
+;
entry:
br label %loop
@@ -976,6 +1140,22 @@ define void @phi_v4f16(ptr %base, i64 %bound) {
; MIN32-NEXT: store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
; MIN32-NEXT: ret void
;
+; MIN64-LABEL: @phi_v4f16(
+; MIN64-NEXT: entry:
+; MIN64-NEXT: br label [[LOOP:%.*]]
+; MIN64: loop:
+; MIN64-NEXT: [[X:%.*]] = phi <4 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT: [[A:%.*]] = load <4 x half>, ptr [[P]], align 2
+; MIN64-NEXT: [[X_NEXT]] = fadd <4 x half> [[X]], [[A]]
+; MIN64-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64: end:
+; MIN64-NEXT: store <4 x half> [[X_NEXT]], ptr [[BASE]], align 8
+; MIN64-NEXT: ret void
+;
entry:
br label %loop
@@ -1009,6 +1189,10 @@ define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) {
; MIN32-LABEL: @call_v2f16(
; MIN32-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
; MIN32-NEXT: ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @call_v2f16(
+; MIN64-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
+; MIN64-NEXT: ret <2 x half> [[R]]
;
%r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %r
@@ -1040,6 +1224,10 @@ define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
; MIN32-NEXT: ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @call_v3f16(
+; MIN64-NEXT: [[R:%.*]] = call <3 x half> @llvm.minnum.v3f16(<3 x half> [[A:%.*]], <3 x half> [[B:%.*]])
+; MIN64-NEXT: ret <3 x half> [[R]]
;
%r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
ret <3 x half> %r
@@ -1076,6 +1264,10 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; MIN32-NEXT: ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @call_v4f16(
+; MIN64-NEXT: [[R:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]])
+; MIN64-NEXT: ret <4 x half> [[R]]
;
%r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %r
@@ -1108,6 +1300,11 @@ define noundef <2 x half> @frexp_v2f16(<2 x half> noundef %h) {
; MIN32-NEXT: [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
; MIN32-NEXT: [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
; MIN32-NEXT: ret <2 x half> [[E0]]
+;
+; MIN64-LABEL: @frexp_v2f16(
+; MIN64-NEXT: [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
+; MIN64-NEXT: [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
+; MIN64-NEXT: ret <2 x half> [[E0]]
;
%r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
%e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
More information about the llvm-commits
mailing list