[llvm] [Scalarizer] Ensure valid VectorSplits for each struct element in `visitExtractValueInst` (PR #128538)

Deric Cheung via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 11:04:39 PST 2025


https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/128538

>From f50c0fa6ca7732c08178325d7fce3e006c2d5831 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 17 Feb 2025 16:47:09 +0000
Subject: [PATCH 1/4] [Scalarizer] Test `*_with_overflow` intrinsics with
 min-bits

---
 llvm/test/Transforms/Scalarizer/min-bits.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index 97cc71626e208..377893ad7e6fd 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1081,6 +1081,17 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
   ret <4 x half> %r
 }
 
+define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
+; CHECK-LABEL: @call_v3i32(
+; CHECK-NEXT:    [[T:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[T]], 0
+; CHECK-NEXT:    ret <3 x i32> [[R]]
+;
+  %t = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %a, <3 x i32> %b)
+  %r = extractvalue { <3 x i32>, <3 x i1> } %t, 0
+  ret <3 x i32> %r
+}
+
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)

>From 9b5080c5c6bc0f386e9c84aa64bc8a52a13785f2 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 05:16:49 +0000
Subject: [PATCH 2/4] [Scalarizer] Add check in visitExtractValueInst for
 struct elements with differing bitness

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 25 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2b27150112ad8..820c8e12d2449 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -719,13 +719,12 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
     for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
       std::optional<VectorSplit> CurrVS =
           getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
-      // This case does not seem to happen, but it is possible for
-      // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
-      // is not returned and we will bailout of handling this call.
-      // The secondary bailout case is if NumPacked does not match.
-      // This can happen if ScalarizeMinBits is not set to the default.
-      // This means with certain ScalarizeMinBits intrinsics like frexp
-      // will only scalarize when the struct elements have the same bitness.
+      // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
+      // VectorSplit is not returned and we will bailout of handling this call.
+      // The secondary bailout case is if NumPacked does not match. This can
+      // happen if ScalarizeMinBits is not set to the default. This means with
+      // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
+      // the struct elements have the same bitness.
       if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
         return false;
       if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I, TTI))
@@ -1083,6 +1082,18 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   std::optional<VectorSplit> VS = getVectorSplit(VecType);
   if (!VS)
     return false;
+  for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
+    std::optional<VectorSplit> CurrVS =
+        getVectorSplit(cast<FixedVectorType>(OpTy->getContainedType(I)));
+    // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
+    // VectorSplit is not returned and we will bailout of handling this call.
+    // The secondary bailout case is if NumPacked does not match. This can
+    // happen if ScalarizeMinBits is not set to the default. This means with
+    // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
+    // the struct elements have the same bitness.
+    if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
+      return false;
+  }
   IRBuilder<> Builder(&EVI);
   Scatterer Op0 = scatter(&EVI, Op, *VS);
   assert(!EVI.getIndices().empty() && "Make sure an index exists");

>From f40f03f6fd2b702a590f7066ff6a1923312a6a96 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 18:16:26 +0000
Subject: [PATCH 3/4] Add frexp to min-bits scalarizer test

---
 llvm/test/Transforms/Scalarizer/min-bits.ll | 26 +++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index 377893ad7e6fd..c390560f16eaf 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1081,8 +1081,8 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
   ret <4 x half> %r
 }
 
-define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
-; CHECK-LABEL: @call_v3i32(
+define <3 x i32> @uadd_with_overflow_v3i32(<3 x i32> %a, <3 x i32> %b) {
+; CHECK-LABEL: @uadd_with_overflow_v3i32(
 ; CHECK-NEXT:    [[T:%.*]] = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]])
 ; CHECK-NEXT:    [[R:%.*]] = extractvalue { <3 x i32>, <3 x i1> } [[T]], 0
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
@@ -1092,6 +1092,28 @@ define <3 x i32> @call_v3i32(<3 x i32> %a, <3 x i32> %b) {
   ret <3 x i32> %r
 }
 
+define noundef <2 x half> @frexp_v2f16(<2 x half> noundef %h) {
+; MIN16-LABEL: @frexp_v2f16(
+; MIN16-NEXT:    [[H_I0:%.*]] = extractelement <2 x half> [[H:%.*]], i64 0
+; MIN16-NEXT:    [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]])
+; MIN16-NEXT:    [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1
+; MIN16-NEXT:    [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]])
+; MIN16-NEXT:    [[E0_ELEM0:%.*]] = extractvalue { half, i32 } [[R_I0]], 0
+; MIN16-NEXT:    [[E0_ELEM01:%.*]] = extractvalue { half, i32 } [[R_I1]], 0
+; MIN16-NEXT:    [[E0_UPTO0:%.*]] = insertelement <2 x half> poison, half [[E0_ELEM0]], i64 0
+; MIN16-NEXT:    [[E0:%.*]] = insertelement <2 x half> [[E0_UPTO0]], half [[E0_ELEM01]], i64 1
+; MIN16-NEXT:    ret <2 x half> [[E0]]
+;
+; MIN32-LABEL: @frexp_v2f16(
+; MIN32-NEXT:    [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
+; MIN32-NEXT:    [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
+; MIN32-NEXT:    ret <2 x half> [[E0]]
+;
+  %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+  %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
+  ret <2 x half> %e0
+}
+
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)

>From 005b51bc634e5ed5af7600d01a461ed75f09233f Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 24 Feb 2025 19:03:49 +0000
Subject: [PATCH 4/4] Add frexp to min-bits scalarizer test

---
 llvm/test/Transforms/Scalarizer/min-bits.ll | 197 ++++++++++++++++++++
 1 file changed, 197 insertions(+)

diff --git a/llvm/test/Transforms/Scalarizer/min-bits.ll b/llvm/test/Transforms/Scalarizer/min-bits.ll
index c390560f16eaf..f9e6774ffff64 100644
--- a/llvm/test/Transforms/Scalarizer/min-bits.ll
+++ b/llvm/test/Transforms/Scalarizer/min-bits.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=16>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN16
 ; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=32>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN32
+; RUN: opt %s -passes='function(scalarizer<load-store;min-bits=64>,dce)' -S | FileCheck %s --check-prefixes=CHECK,MIN64
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
@@ -23,6 +24,13 @@ define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    [[C:%.*]] = add <2 x i16> [[A]], [[B]]
 ; MIN32-NEXT:    store <2 x i16> [[C]], ptr [[PA]], align 8
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @load_add_store_v2i16(
+; MIN64-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT:    [[C:%.*]] = add <2 x i16> [[A]], [[B]]
+; MIN64-NEXT:    store <2 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT:    ret void
 ;
   %a = load <2 x i16>, ptr %pa, align 8
   %b = load <2 x i16>, ptr %pb, align 8
@@ -63,6 +71,13 @@ define void @load_add_store_v3i16(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    store <2 x i16> [[C_I0]], ptr [[PA]], align 8
 ; MIN32-NEXT:    store i16 [[C_I1]], ptr [[PA_I1]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @load_add_store_v3i16(
+; MIN64-NEXT:    [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[B:%.*]] = load <3 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT:    [[C:%.*]] = add <3 x i16> [[A]], [[B]]
+; MIN64-NEXT:    store <3 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT:    ret void
 ;
   %a = load <3 x i16>, ptr %pa, align 8
   %b = load <3 x i16>, ptr %pb, align 8
@@ -109,6 +124,13 @@ define void @load_add_store_v4i16(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    store <2 x i16> [[C_I0]], ptr [[PA]], align 8
 ; MIN32-NEXT:    store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @load_add_store_v4i16(
+; MIN64-NEXT:    [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[B:%.*]] = load <4 x i16>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT:    [[C:%.*]] = add <4 x i16> [[A]], [[B]]
+; MIN64-NEXT:    store <4 x i16> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT:    ret void
 ;
   %a = load <4 x i16>, ptr %pa, align 8
   %b = load <4 x i16>, ptr %pb, align 8
@@ -153,6 +175,13 @@ define void @load_add_store_v4i10(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3
 ; MIN32-NEXT:    store <4 x i10> [[C]], ptr [[PA]], align 8
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @load_add_store_v4i10(
+; MIN64-NEXT:    [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
+; MIN64-NEXT:    [[C:%.*]] = add <4 x i10> [[A]], [[B]]
+; MIN64-NEXT:    store <4 x i10> [[C]], ptr [[PA]], align 8
+; MIN64-NEXT:    ret void
 ;
   %a = load <4 x i10>, ptr %pa, align 8
   %b = load <4 x i10>, ptr %pb, align 8
@@ -176,6 +205,10 @@ define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b,
 ; MIN32-LABEL: @select_uniform_condition_v2f16(
 ; MIN32-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
 ; MIN32-NEXT:    ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
+; MIN64-NEXT:    ret <2 x half> [[R]]
 ;
   %r = select i1 %cc, <2 x half> %a, <2 x half> %b
   ret <2 x half> %r
@@ -207,6 +240,10 @@ define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b,
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v3f16(
+; MIN64-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], <3 x half> [[A:%.*]], <3 x half> [[B:%.*]]
+; MIN64-NEXT:    ret <3 x half> [[R]]
 ;
   %r = select i1 %cc, <3 x half> %a, <3 x half> %b
   ret <3 x half> %r
@@ -243,6 +280,10 @@ define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b,
 ; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @select_uniform_condition_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = select i1 [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]]
+; MIN64-NEXT:    ret <4 x half> [[R]]
 ;
   %r = select i1 %cc, <4 x half> %a, <4 x half> %b
   ret <4 x half> %r
@@ -270,6 +311,10 @@ define <2 x half> @unary_v2f16(<2 x half> %a) {
 ; MIN32-LABEL: @unary_v2f16(
 ; MIN32-NEXT:    [[R:%.*]] = fneg <2 x half> [[A:%.*]]
 ; MIN32-NEXT:    ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = fneg <2 x half> [[A:%.*]]
+; MIN64-NEXT:    ret <2 x half> [[R]]
 ;
   %r = fneg <2 x half> %a
   ret <2 x half> %r
@@ -296,6 +341,10 @@ define <3 x half> @unary_v3f16(<3 x half> %a) {
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v3f16(
+; MIN64-NEXT:    [[R:%.*]] = fneg <3 x half> [[A:%.*]]
+; MIN64-NEXT:    ret <3 x half> [[R]]
 ;
   %r = fneg <3 x half> %a
   ret <3 x half> %r
@@ -326,6 +375,10 @@ define <4 x half> @unary_v4f16(<4 x half> %a) {
 ; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @unary_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = fneg <4 x half> [[A:%.*]]
+; MIN64-NEXT:    ret <4 x half> [[R]]
 ;
   %r = fneg <4 x half> %a
   ret <4 x half> %r
@@ -346,6 +399,10 @@ define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) {
 ; MIN32-LABEL: @binary_v2f16(
 ; MIN32-NEXT:    [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
 ; MIN32-NEXT:    ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT:    ret <2 x half> [[R]]
 ;
   %r = fadd <2 x half> %a, %b
   ret <2 x half> %r
@@ -377,6 +434,10 @@ define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v3f16(
+; MIN64-NEXT:    [[R:%.*]] = fadd <3 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT:    ret <3 x half> [[R]]
 ;
   %r = fadd <3 x half> %a, %b
   ret <3 x half> %r
@@ -413,6 +474,10 @@ define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
 ; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @binary_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = fadd <4 x half> [[A:%.*]], [[B:%.*]]
+; MIN64-NEXT:    ret <4 x half> [[R]]
 ;
   %r = fadd <4 x half> %a, %b
   ret <4 x half> %r
@@ -431,6 +496,10 @@ define <2 x i16> @fptosi_v2f16(<2 x half> %a) {
 ; MIN32-LABEL: @fptosi_v2f16(
 ; MIN32-NEXT:    [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
 ; MIN32-NEXT:    ret <2 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
+; MIN64-NEXT:    ret <2 x i16> [[R]]
 ;
   %r = fptosi <2 x half> %a to <2 x i16>
   ret <2 x i16> %r
@@ -457,6 +526,10 @@ define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v3f16(
+; MIN64-NEXT:    [[R:%.*]] = fptosi <3 x half> [[A:%.*]] to <3 x i16>
+; MIN64-NEXT:    ret <3 x i16> [[R]]
 ;
   %r = fptosi <3 x half> %a to <3 x i16>
   ret <3 x i16> %r
@@ -487,6 +560,10 @@ define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
 ; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x i16> [[R]]
+;
+; MIN64-LABEL: @fptosi_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = fptosi <4 x half> [[A:%.*]] to <4 x i16>
+; MIN64-NEXT:    ret <4 x i16> [[R]]
 ;
   %r = fptosi <4 x half> %a to <4 x i16>
   ret <4 x i16> %r
@@ -511,6 +588,10 @@ define <4 x float> @fpext_v4f16(<4 x half> %a) {
 ; MIN32-LABEL: @fpext_v4f16(
 ; MIN32-NEXT:    [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
 ; MIN32-NEXT:    ret <4 x float> [[R]]
+;
+; MIN64-LABEL: @fpext_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
+; MIN64-NEXT:    ret <4 x float> [[R]]
 ;
   %r = fpext <4 x half> %a to <4 x float>
   ret <4 x float> %r
@@ -544,6 +625,10 @@ define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) {
 ; MIN32-LABEL: @gep1_v4(
 ; MIN32-NEXT:    [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
 ; MIN32-NEXT:    ret <4 x ptr> [[P]]
+;
+; MIN64-LABEL: @gep1_v4(
+; MIN64-NEXT:    [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
+; MIN64-NEXT:    ret <4 x ptr> [[P]]
 ;
   %p = getelementptr i32, ptr %base, <4 x i16> %a
   ret <4 x ptr> %p
@@ -592,6 +677,10 @@ define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) {
 ; MIN32-LABEL: @gep3_v4(
 ; MIN32-NEXT:    [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
 ; MIN32-NEXT:    ret <4 x ptr> [[P]]
+;
+; MIN64-LABEL: @gep3_v4(
+; MIN64-NEXT:    [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
+; MIN64-NEXT:    ret <4 x ptr> [[P]]
 ;
   %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a
   ret <4 x ptr> %p
@@ -609,6 +698,11 @@ define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) {
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
 ; MIN32-NEXT:    store <2 x i16> [[R]], ptr [[P:%.*]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @insertelement_v2i16(
+; MIN64-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
+; MIN64-NEXT:    store <2 x i16> [[R]], ptr [[P:%.*]], align 4
+; MIN64-NEXT:    ret void
 ;
   %r = insertelement <2 x i16> %a, i16 %b, i64 1
   store <2 x i16> %r, ptr %p
@@ -632,6 +726,11 @@ define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) {
 ; MIN32-NEXT:    store <2 x i16> [[A_I0]], ptr [[P]], align 8
 ; MIN32-NEXT:    store i16 [[B:%.*]], ptr [[P_I1]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @insertelement_v3i16(
+; MIN64-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[A:%.*]], i16 [[B:%.*]], i64 2
+; MIN64-NEXT:    store <3 x i16> [[R]], ptr [[P:%.*]], align 8
+; MIN64-NEXT:    ret void
 ;
   %r = insertelement <3 x i16> %a, i16 %b, i64 2
   store <3 x i16> %r, ptr %p
@@ -660,6 +759,11 @@ define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) {
 ; MIN32-NEXT:    store <2 x i16> [[A_I0]], ptr [[P]], align 8
 ; MIN32-NEXT:    store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @insertelement_v4i16(
+; MIN64-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[A:%.*]], i16 [[B:%.*]], i64 3
+; MIN64-NEXT:    store <4 x i16> [[R]], ptr [[P:%.*]], align 8
+; MIN64-NEXT:    ret void
 ;
   %r = insertelement <4 x i16> %a, i16 %b, i64 3
   store <4 x i16> %r, ptr %p
@@ -677,6 +781,11 @@ define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) {
 ; MIN32-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
 ; MIN32-NEXT:    ret <2 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v2i16(
+; MIN64-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
+; MIN64-NEXT:    [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
+; MIN64-NEXT:    ret <2 x i16> [[R]]
 ;
   %a = load <2 x i16>, ptr %pa
   %r = insertelement <2 x i16> %a, i16 %b, i64 1
@@ -698,6 +807,11 @@ define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2
 ; MIN32-NEXT:    ret <3 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v3i16(
+; MIN64-NEXT:    [[A:%.*]] = load <3 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[R:%.*]] = insertelement <3 x i16> [[A]], i16 [[B:%.*]], i64 2
+; MIN64-NEXT:    ret <3 x i16> [[R]]
 ;
   %a = load <3 x i16>, ptr %pa
   %r = insertelement <3 x i16> %a, i16 %b, i64 2
@@ -726,6 +840,11 @@ define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
 ; MIN32-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x i16> [[R]]
+;
+; MIN64-LABEL: @load_insertelement_v4i16(
+; MIN64-NEXT:    [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[R:%.*]] = insertelement <4 x i16> [[A]], i16 [[B:%.*]], i64 3
+; MIN64-NEXT:    ret <4 x i16> [[R]]
 ;
   %a = load <4 x i16>, ptr %pa
   %r = insertelement <4 x i16> %a, i16 %b, i64 3
@@ -758,6 +877,13 @@ define void @shufflevector_grow(ptr %pa, ptr %pb) {
 ; MIN32-NEXT:    [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
 ; MIN32-NEXT:    store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @shufflevector_grow(
+; MIN64-NEXT:    [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
+; MIN64-NEXT:    [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4
+; MIN64-NEXT:    [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; MIN64-NEXT:    store <4 x i16> [[R]], ptr [[PA]], align 8
+; MIN64-NEXT:    ret void
 ;
   %a = load <2 x i16>, ptr %pa
   %b = load <2 x i16>, ptr %pb
@@ -786,6 +912,12 @@ define void @shufflevector_shrink(ptr %pa) {
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
 ; MIN32-NEXT:    store <2 x i16> [[R]], ptr [[PA]], align 4
 ; MIN32-NEXT:    ret void
+;
+; MIN64-LABEL: @shufflevector_shrink(
+; MIN64-NEXT:    [[A:%.*]] = load <4 x i16>, ptr [[PA:%.*]], align 8
+; MIN64-NEXT:    [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
+; MIN64-NEXT:    store <2 x i16> [[R]], ptr [[PA]], align 4
+; MIN64-NEXT:    ret void
 ;
   %a = load <4 x i16>, ptr %pa
   %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
@@ -832,6 +964,22 @@ define void @phi_v2f16(ptr %base, i64 %bound) {
 ; MIN32-NEXT:    store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
 ; MIN32-NEXT:    ret void
 ;
+; MIN64-LABEL: @phi_v2f16(
+; MIN64-NEXT:  entry:
+; MIN64-NEXT:    br label [[LOOP:%.*]]
+; MIN64:       loop:
+; MIN64-NEXT:    [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT:    [[A:%.*]] = load <2 x half>, ptr [[P]], align 2
+; MIN64-NEXT:    [[X_NEXT]] = fadd <2 x half> [[X]], [[A]]
+; MIN64-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64:       end:
+; MIN64-NEXT:    store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
+; MIN64-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -901,6 +1049,22 @@ define void @phi_v3f16(ptr %base, i64 %bound) {
 ; MIN32-NEXT:    store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
 ; MIN32-NEXT:    ret void
 ;
+; MIN64-LABEL: @phi_v3f16(
+; MIN64-NEXT:  entry:
+; MIN64-NEXT:    br label [[LOOP:%.*]]
+; MIN64:       loop:
+; MIN64-NEXT:    [[X:%.*]] = phi <3 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT:    [[A:%.*]] = load <3 x half>, ptr [[P]], align 2
+; MIN64-NEXT:    [[X_NEXT]] = fadd <3 x half> [[X]], [[A]]
+; MIN64-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64:       end:
+; MIN64-NEXT:    store <3 x half> [[X_NEXT]], ptr [[BASE]], align 8
+; MIN64-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -976,6 +1140,22 @@ define void @phi_v4f16(ptr %base, i64 %bound) {
 ; MIN32-NEXT:    store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
 ; MIN32-NEXT:    ret void
 ;
+; MIN64-LABEL: @phi_v4f16(
+; MIN64-NEXT:  entry:
+; MIN64-NEXT:    br label [[LOOP:%.*]]
+; MIN64:       loop:
+; MIN64-NEXT:    [[X:%.*]] = phi <4 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; MIN64-NEXT:    [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
+; MIN64-NEXT:    [[A:%.*]] = load <4 x half>, ptr [[P]], align 2
+; MIN64-NEXT:    [[X_NEXT]] = fadd <4 x half> [[X]], [[A]]
+; MIN64-NEXT:    [[IDX_NEXT]] = add i64 [[IDX]], 1
+; MIN64-NEXT:    [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
+; MIN64-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
+; MIN64:       end:
+; MIN64-NEXT:    store <4 x half> [[X_NEXT]], ptr [[BASE]], align 8
+; MIN64-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -1009,6 +1189,10 @@ define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) {
 ; MIN32-LABEL: @call_v2f16(
 ; MIN32-NEXT:    [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
 ; MIN32-NEXT:    ret <2 x half> [[R]]
+;
+; MIN64-LABEL: @call_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
+; MIN64-NEXT:    ret <2 x half> [[R]]
 ;
   %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
   ret <2 x half> %r
@@ -1040,6 +1224,10 @@ define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
 ; MIN32-NEXT:    [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
 ; MIN32-NEXT:    ret <3 x half> [[R]]
+;
+; MIN64-LABEL: @call_v3f16(
+; MIN64-NEXT:    [[R:%.*]] = call <3 x half> @llvm.minnum.v3f16(<3 x half> [[A:%.*]], <3 x half> [[B:%.*]])
+; MIN64-NEXT:    ret <3 x half> [[R]]
 ;
   %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
   ret <3 x half> %r
@@ -1076,6 +1264,10 @@ define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
 ; MIN32-NEXT:    [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; MIN32-NEXT:    [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 ; MIN32-NEXT:    ret <4 x half> [[R]]
+;
+; MIN64-LABEL: @call_v4f16(
+; MIN64-NEXT:    [[R:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]])
+; MIN64-NEXT:    ret <4 x half> [[R]]
 ;
   %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
   ret <4 x half> %r
@@ -1108,6 +1300,11 @@ define noundef <2 x half> @frexp_v2f16(<2 x half> noundef %h) {
 ; MIN32-NEXT:    [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
 ; MIN32-NEXT:    [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
 ; MIN32-NEXT:    ret <2 x half> [[E0]]
+;
+; MIN64-LABEL: @frexp_v2f16(
+; MIN64-NEXT:    [[R:%.*]] = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> [[H:%.*]])
+; MIN64-NEXT:    [[E0:%.*]] = extractvalue { <2 x half>, <2 x i32> } [[R]], 0
+; MIN64-NEXT:    ret <2 x half> [[E0]]
 ;
   %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
   %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0



More information about the llvm-commits mailing list