[llvm] [VectorUtils] Trivially vectorize ldexp, [l]lround (PR #145545)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 06:37:28 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/145545
>From 5c8064d964a46c23986ac88914f19c9c6e003c7e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 24 Jun 2025 17:04:24 +0100
Subject: [PATCH 1/2] [VectorUtils] Trivially vectorize ldexp, [l]lround
---
llvm/lib/Analysis/VectorUtils.cpp | 6 +
.../Transforms/LoopVectorize/intrinsic.ll | 201 ++++++++++++++++++
2 files changed, 207 insertions(+)
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 63fccee63c0ae..a391e92e84fc6 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::exp:
case Intrinsic::exp10:
case Intrinsic::exp2:
+ case Intrinsic::ldexp:
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
@@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lround:
+ case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint:
case Intrinsic::ucmp:
@@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ case Intrinsic::lround:
+ case Intrinsic::llround:
case Intrinsic::lrint:
case Intrinsic::llrint:
case Intrinsic::vp_lrint:
@@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::vp_is_fpclass:
return OpdIdx == 0;
case Intrinsic::powi:
+ case Intrinsic::ldexp:
return OpdIdx == -1 || OpdIdx == 1;
default:
return OpdIdx == -1;
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 9c910d70807a1..10d83a456d0e2 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -324,6 +324,56 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.exp2.f64(double)
+define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
+; CHECK-LABEL: @ldexp_f32i32(
+; CHECK: llvm.ldexp.v4f32.v4i32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp)
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv
+ store float %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @llvm.ldexp.f32.i32(float, i32)
+
+define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
+; CHECK-LABEL: @ldexp_f64i32(
+; CHECK: llvm.ldexp.v4f64.v4i32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp)
+ %arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv
+ store double %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare double @llvm.ldexp.f64.i32(double, i32)
+
define void @log_f32(i32 %n, ptr %y, ptr %x) {
; CHECK-LABEL: @log_f32(
; CHECK: llvm.log.v4f32
@@ -976,6 +1026,157 @@ for.end: ; preds = %for.body, %entry
declare double @llvm.roundeven.f64(double)
+
+define void @lround_i32f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i32f32(
+; CHECK: llvm.lround.v4i32.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i32 @llvm.lround.i32.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
+ store i32 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i32 @llvm.lround.i32.f32(float)
+
+define void @lround_i32f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i32f64(
+; CHECK: llvm.lround.v4i32.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i32 @llvm.lround.i32.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
+ store i32 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i32 @llvm.lround.i32.f64(double)
+
+define void @lround_i64f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i64f32(
+; CHECK: llvm.lround.v4i64.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i64 @llvm.lround.i64.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.lround.i64.f32(float)
+
+define void @lround_i64f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @lround_i64f64(
+; CHECK: llvm.lround.v4i64.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i64 @llvm.lround.i64.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.lround.i64.f64(double)
+
+define void @llround_i64f32(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @llround_i64f32(
+; CHECK: llvm.llround.v4i64.v4f32
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call i64 @llvm.llround.i64.f32(float %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 4
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.llround.i64.f32(float)
+
+define void @llround_i64f64(i32 %n, ptr %y, ptr %x) {
+; CHECK-LABEL: @llround_i64f64(
+; CHECK: llvm.llround.v4i64.v4f64
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
+ %0 = load double, ptr %arrayidx, align 8
+ %call = tail call i64 @llvm.llround.i64.f64(double %0)
+ %arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
+ store i64 %call, ptr %arrayidx2, align 8
+ %iv.next = add i32 %iv, 1
+ %exitcond = icmp eq i32 %iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare i64 @llvm.llround.i64.f64(double)
+
define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) {
; CHECK-LABEL: @fma_f32(
; CHECK: llvm.fma.v4f32
>From 618a972e578543af1b02adf9d1d2461705af5698 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Sun, 29 Jun 2025 14:11:14 +0100
Subject: [PATCH 2/2] [SLPVectorizer/Scalarizer] Add tests for [l]lround, ldexp
---
.../Transforms/SLPVectorizer/AArch64/exp.ll | 97 ++++++
.../SLPVectorizer/AArch64/fround.ll | 280 ++++++++++++++++++
llvm/test/Transforms/Scalarizer/intrinsics.ll | 47 +++
3 files changed, 424 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll
create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll
new file mode 100644
index 0000000000000..dcb80c9f0e0a9
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/exp.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @ldexp_f32i32(ptr %x, ptr %y, i32 %exp, i32 %n) {
+; CHECK-LABEL: @ldexp_f32i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L0]], i32 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L2]], i32 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L4]], i32 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call float @llvm.ldexp.f32.i32(float [[L6]], i32 [[EXP]])
+; CHECK-NEXT: store float [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 1
+; CHECK-NEXT: store float [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 2
+; CHECK-NEXT: store float [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 3
+; CHECK-NEXT: store float [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call float @llvm.ldexp.f32.i32(float %l0, i32 %exp)
+ %l3 = tail call float @llvm.ldexp.f32.i32(float %l2, i32 %exp)
+ %l5 = tail call float @llvm.ldexp.f32.i32(float %l4, i32 %exp)
+ %l7 = tail call float @llvm.ldexp.f32.i32(float %l6, i32 %exp)
+ store float %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds float, ptr %y, i64 1
+ store float %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds float, ptr %y, i64 2
+ store float %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds float, ptr %y, i64 3
+ store float %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @ldexp_f64i32(ptr %x, ptr %y, i32 %exp, i32 %n) {
+; CHECK-LABEL: @ldexp_f64i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L0]], i32 [[EXP:%.*]])
+; CHECK-NEXT: [[L3:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L2]], i32 [[EXP]])
+; CHECK-NEXT: [[L5:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L4]], i32 [[EXP]])
+; CHECK-NEXT: [[L7:%.*]] = tail call double @llvm.ldexp.f64.i32(double [[L6]], i32 [[EXP]])
+; CHECK-NEXT: store double [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 1
+; CHECK-NEXT: store double [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 2
+; CHECK-NEXT: store double [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 3
+; CHECK-NEXT: store double [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call double @llvm.ldexp.f64.i32(double %l0, i32 %exp)
+ %l3 = tail call double @llvm.ldexp.f64.i32(double %l2, i32 %exp)
+ %l5 = tail call double @llvm.ldexp.f64.i32(double %l4, i32 %exp)
+ %l7 = tail call double @llvm.ldexp.f64.i32(double %l6, i32 %exp)
+ store double %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds double, ptr %y, i64 1
+ store double %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds double, ptr %y, i64 2
+ store double %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds double, ptr %y, i64 3
+ store double %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+declare float @llvm.ldexp.f32.i32(float, i32)
+declare double @llvm.ldexp.f64.i32(double, i32)
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll
new file mode 100644
index 0000000000000..07a3fe7d0bbc5
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/fround.ll
@@ -0,0 +1,280 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @lround_i32f32(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @lround_i32f32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f32(float [[L6]])
+; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1
+; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2
+; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3
+; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call i32 @llvm.lround.i32.f32(float %l0)
+ %l3 = tail call i32 @llvm.lround.i32.f32(float %l2)
+ %l5 = tail call i32 @llvm.lround.i32.f32(float %l4)
+ %l7 = tail call i32 @llvm.lround.i32.f32(float %l6)
+ store i32 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+ store i32 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+ store i32 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+ store i32 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @lround_i32f64(ptr %x, ptr %y, i32 %n) {
+; CHECK-LABEL: @lround_i32f64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i32 @llvm.lround.i32.f64(double [[L6]])
+; CHECK-NEXT: store i32 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 1
+; CHECK-NEXT: store i32 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 2
+; CHECK-NEXT: store i32 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[Y]], i64 3
+; CHECK-NEXT: store i32 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call i32 @llvm.lround.i32.f64(double %l0)
+ %l3 = tail call i32 @llvm.lround.i32.f64(double %l2)
+ %l5 = tail call i32 @llvm.lround.i32.f64(double %l4)
+ %l7 = tail call i32 @llvm.lround.i32.f64(double %l6)
+ store i32 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i32, ptr %y, i64 1
+ store i32 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i32, ptr %y, i64 2
+ store i32 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i32, ptr %y, i64 3
+ store i32 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @lround_i64f32(ptr %x, ptr %y, i64 %n) {
+; CHECK-LABEL: @lround_i64f32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f32(float [[L6]])
+; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1
+; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2
+; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3
+; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call i64 @llvm.lround.i64.f32(float %l0)
+ %l3 = tail call i64 @llvm.lround.i64.f32(float %l2)
+ %l5 = tail call i64 @llvm.lround.i64.f32(float %l4)
+ %l7 = tail call i64 @llvm.lround.i64.f32(float %l6)
+ store i64 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1
+ store i64 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2
+ store i64 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3
+ store i64 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @lround_i64f64(ptr %x, ptr %y, i64 %n) {
+; CHECK-LABEL: @lround_i64f64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.lround.i64.f64(double [[L6]])
+; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1
+; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2
+; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3
+; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call i64 @llvm.lround.i64.f64(double %l0)
+ %l3 = tail call i64 @llvm.lround.i64.f64(double %l2)
+ %l5 = tail call i64 @llvm.lround.i64.f64(double %l4)
+ %l7 = tail call i64 @llvm.lround.i64.f64(double %l6)
+ store i64 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1
+ store i64 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2
+ store i64 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3
+ store i64 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @llround_i64f32(ptr %x, ptr %y, i64 %n) {
+; CHECK-LABEL: @llround_i64f32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f32(float [[L6]])
+; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1
+; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2
+; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3
+; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load float, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 1
+ %l2 = load float, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, ptr %x, i64 2
+ %l4 = load float, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, ptr %x, i64 3
+ %l6 = load float, ptr %arrayidx.3, align 4
+ %l1 = tail call i64 @llvm.llround.i64.f32(float %l0)
+ %l3 = tail call i64 @llvm.llround.i64.f32(float %l2)
+ %l5 = tail call i64 @llvm.llround.i64.f32(float %l4)
+ %l7 = tail call i64 @llvm.llround.i64.f32(float %l6)
+ store i64 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1
+ store i64 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2
+ store i64 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3
+ store i64 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+define void @llround_i64f64(ptr %x, ptr %y, i64 %n) {
+; CHECK-LABEL: @llround_i64f64(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[X:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 1
+; CHECK-NEXT: [[L2:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 2
+; CHECK-NEXT: [[L4:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 3
+; CHECK-NEXT: [[L6:%.*]] = load double, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[L1:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L0]])
+; CHECK-NEXT: [[L3:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L2]])
+; CHECK-NEXT: [[L5:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L4]])
+; CHECK-NEXT: [[L7:%.*]] = tail call i64 @llvm.llround.i64.f64(double [[L6]])
+; CHECK-NEXT: store i64 [[L1]], ptr [[Y:%.*]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 1
+; CHECK-NEXT: store i64 [[L3]], ptr [[ARRAYIDX2_1]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 2
+; CHECK-NEXT: store i64 [[L5]], ptr [[ARRAYIDX2_2]], align 4
+; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i64, ptr [[Y]], i64 3
+; CHECK-NEXT: store i64 [[L7]], ptr [[ARRAYIDX2_3]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %l0 = load double, ptr %x, align 4
+ %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 1
+ %l2 = load double, ptr %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds double, ptr %x, i64 2
+ %l4 = load double, ptr %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds double, ptr %x, i64 3
+ %l6 = load double, ptr %arrayidx.3, align 4
+ %l1 = tail call i64 @llvm.llround.i64.f64(double %l0)
+ %l3 = tail call i64 @llvm.llround.i64.f64(double %l2)
+ %l5 = tail call i64 @llvm.llround.i64.f64(double %l4)
+ %l7 = tail call i64 @llvm.llround.i64.f64(double %l6)
+ store i64 %l1, ptr %y, align 4
+ %arrayidx2.1 = getelementptr inbounds i64, ptr %y, i64 1
+ store i64 %l3, ptr %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds i64, ptr %y, i64 2
+ store i64 %l5, ptr %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds i64, ptr %y, i64 3
+ store i64 %l7, ptr %arrayidx2.3, align 4
+ ret void
+}
+
+declare i32 @llvm.lround.i32.f32(float)
+declare i64 @llvm.lround.i64.f32(float)
+declare i64 @llvm.lround.i64.f64(double)
+declare i64 @llvm.llround.i64.f32(float)
+declare i64 @llvm.llround.i64.f64(double)
diff --git a/llvm/test/Transforms/Scalarizer/intrinsics.ll b/llvm/test/Transforms/Scalarizer/intrinsics.ll
index cee44ef434260..070c765294ba1 100644
--- a/llvm/test/Transforms/Scalarizer/intrinsics.ll
+++ b/llvm/test/Transforms/Scalarizer/intrinsics.ll
@@ -8,6 +8,7 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
+declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>)
; Ternary fp
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
@@ -32,6 +33,8 @@ declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
; Unary fp operand, int return type
declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float>)
declare <2 x i32> @llvm.llrint.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float>)
+declare <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float>)
; Bool return type, overloaded on fp operand type
declare <2 x i1> @llvm.is.fpclass(<2 x float>, i32)
@@ -159,6 +162,22 @@ define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 {
ret <2 x float> %powi
}
+define <2 x float> @scalarize_ldexp_v2f32(<2 x float> %x, <2 x i32> %y) #0 {
+; CHECK-LABEL: @scalarize_ldexp_v2f32(
+; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-NEXT: [[Y:%.*]] = extractelement <2 x i32> [[Y1:%.*]], i64 0
+; CHECK-NEXT: [[POWI_I0:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I0]], i32 [[Y]])
+; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
+; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x i32> [[Y1]], i64 1
+; CHECK-NEXT: [[POWI_I1:%.*]] = call float @llvm.ldexp.f32.i32(float [[X_I1]], i32 [[Y_I1]])
+; CHECK-NEXT: [[POWI_UPTO0:%.*]] = insertelement <2 x float> poison, float [[POWI_I0]], i64 0
+; CHECK-NEXT: [[POWI:%.*]] = insertelement <2 x float> [[POWI_UPTO0]], float [[POWI_I1]], i64 1
+; CHECK-NEXT: ret <2 x float> [[POWI]]
+;
+ %powi = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> %y)
+ ret <2 x float> %powi
+}
+
define <2 x i32> @scalarize_smul_fix_sat_v2i32(<2 x i32> %x) #0 {
; CHECK-LABEL: @scalarize_smul_fix_sat_v2i32(
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x i32> [[X:%.*]], i64 0
@@ -243,6 +262,34 @@ define <2 x i32> @scalarize_llrint(<2 x float> %x) #0 {
ret <2 x i32> %rnd
}
+define <2 x i32> @scalarize_lround(<2 x float> %x) #0 {
+; CHECK-LABEL: @scalarize_lround(
+; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I0]])
+; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
+; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.lround.i32.f32(float [[X_I1]])
+; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
+; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
+; CHECK-NEXT: ret <2 x i32> [[RND]]
+;
+ %rnd = call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> %x)
+ ret <2 x i32> %rnd
+}
+
+define <2 x i32> @scalarize_llround(<2 x float> %x) #0 {
+; CHECK-LABEL: @scalarize_llround(
+; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
+; CHECK-NEXT: [[RND_I0:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I0]])
+; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
+; CHECK-NEXT: [[RND_I1:%.*]] = call i32 @llvm.llround.i32.f32(float [[X_I1]])
+; CHECK-NEXT: [[RND_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RND_I0]], i64 0
+; CHECK-NEXT: [[RND:%.*]] = insertelement <2 x i32> [[RND_UPTO0]], i32 [[RND_I1]], i64 1
+; CHECK-NEXT: ret <2 x i32> [[RND]]
+;
+ %rnd = call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> %x)
+ ret <2 x i32> %rnd
+}
+
define <2 x i1> @scalarize_is_fpclass(<2 x float> %x) #0 {
; CHECK-LABEL: @scalarize_is_fpclass(
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
More information about the llvm-commits
mailing list