[llvm] LegalizeVectorTypes: fix bug in widening of vec result in xrint (PR #71198)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 3 10:42:08 PDT 2023


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/71198

>From c12b31aff77bd1a1b6fc29eb59c2f31a0cd529e3 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Fri, 3 Nov 2023 15:50:02 +0000
Subject: [PATCH 1/2] LegalizeVectorTypes: fix bug in widening of vec result in
 xrint

Fix a bug introduced in 98c90a1 (ISel: introduce vector ISD::LRINT,
ISD::LLRINT; custom RISCV lowering), where ISD::LRINT and ISD::LLRINT
used WidenVecRes_Unary to widen the vector result. This leads to
incorrect CodeGen for RISC-V fixed-vectors of length 3, and a crash in
SelectionDAG when we try to lower llvm.lrint.vxi32.vxf64 on i686. Fix
the bug by implementing a correct WidenVecRes_XRINT.

Fixes #71187.
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  28 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll |  17 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll  |   7 +-
 llvm/test/CodeGen/X86/vector-lrint.ll         | 421 ++++++++----------
 5 files changed, 231 insertions(+), 243 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c48d6c4adf61517..f85c1296cdce856 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -987,6 +987,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_Convert(SDNode *N);
   SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
   SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
+  SDValue WidenVecRes_XRINT(SDNode *N);
   SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
   SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
   SDValue WidenVecRes_ExpOp(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 02d9ce4f0a44d4b..a1a9f0f0615cbc7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4204,6 +4204,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_FP_TO_XINT_SAT(N);
     break;
 
+  case ISD::LRINT:
+  case ISD::LLRINT:
+    Res = WidenVecRes_XRINT(N);
+    break;
+
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -4216,8 +4221,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FLOG2:
   case ISD::FNEARBYINT:
   case ISD::FRINT:
-  case ISD::LRINT:
-  case ISD::LLRINT:
   case ISD::FROUND:
   case ISD::FROUNDEVEN:
   case ISD::FSIN:
@@ -4791,6 +4794,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
   return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) {
+  SDLoc dl(N);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+  SDValue Src = N->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+
+  // Also widen the input.
+  if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+    Src = GetWidenedVector(Src);
+    SrcVT = Src.getValueType();
+  }
+
+  // Input and output not widened to the same size, give up.
+  if (WidenNumElts != SrcVT.getVectorElementCount())
+    return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+  return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
   SDValue InOp = N->getOperand(1);
   SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index ff83e7c8c32ae9b..5d34cd6592702e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -150,7 +150,22 @@ define <3 x i64> @llrint_v3i64_v3f32(<3 x float> %x) {
 ; RV32-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vslide1down.vx v8, v8, a0
 ; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslidedown.vi v8, v8, 2
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vslidedown.vi v8, v8, 3
+; RV32-NEXT:    vfmv.f.s fa0, v8
+; RV32-NEXT:    call llrintf at plt
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl2r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vslide1down.vx v8, v8, a0
+; RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 2
 ; RV32-NEXT:    add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index cd4eec44920c951..7cb864546cebcb9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -111,11 +111,14 @@ define <3 x iXLen> @lrint_v3f32(<3 x float> %x) {
 ; RV64-i32-NEXT:    vfmv.f.s fa5, v10
 ; RV64-i32-NEXT:    fcvt.l.s a0, fa5
 ; RV64-i32-NEXT:    vslide1down.vx v9, v9, a0
-; RV64-i32-NEXT:    vslidedown.vi v8, v8, 2
+; RV64-i32-NEXT:    vslidedown.vi v10, v8, 2
+; RV64-i32-NEXT:    vfmv.f.s fa5, v10
+; RV64-i32-NEXT:    fcvt.l.s a0, fa5
+; RV64-i32-NEXT:    vslide1down.vx v9, v9, a0
+; RV64-i32-NEXT:    vslidedown.vi v8, v8, 3
 ; RV64-i32-NEXT:    vfmv.f.s fa5, v8
 ; RV64-i32-NEXT:    fcvt.l.s a0, fa5
 ; RV64-i32-NEXT:    vslide1down.vx v8, v9, a0
-; RV64-i32-NEXT:    vslidedown.vi v8, v8, 1
 ; RV64-i32-NEXT:    ret
 ;
 ; RV64-i64-LABEL: lrint_v3f32:
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 7373cd32df98d49..43d12fdeffa9a91 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -2,12 +2,10 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefix=X86-AVX
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefix=X86-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64-SSE
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i64
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i64
 
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; X86-SSE2-LABEL: lrint_v1f32:
@@ -19,6 +17,16 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    vcvtss2si {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v1f32:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX-i64-LABEL: lrint_v1f32:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    retq
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
   ret <1 x iXLen> %a
 }
@@ -60,6 +68,31 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; X86-AVX-NEXT:    vcvtss2si %xmm0, %eax
 ; X86-AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v2f32:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm1, %eax
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %ecx
+; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX-i64-LABEL: lrint_v2f32:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-AVX-i64-NEXT:    retq
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
   ret <2 x iXLen> %a
 }
@@ -101,6 +134,21 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; X86-AVX-NEXT:    vcvtss2si %xmm0, %eax
 ; X86-AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v4f32:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm1, %eax
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %ecx
+; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; X64-AVX-i32-NEXT:    retq
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
   ret <4 x iXLen> %a
 }
@@ -154,193 +202,131 @@ define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
-define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v1f64:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
-; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT:    andl $-8, %esp
-; X86-SSE2-NEXT:    subl $8, %esp
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
-; X86-SSE2-NEXT:    fldl (%esp)
-; X86-SSE2-NEXT:    fistpll (%esp)
-; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X86-SSE2-NEXT:    movd %xmm0, %edx
-; X86-SSE2-NEXT:    movl %ebp, %esp
-; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    cvtsd2si {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v1f64:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
-; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
-; X86-AVX-NEXT:    andl $-8, %esp
-; X86-AVX-NEXT:    subl $8, %esp
-; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
-; X86-AVX-NEXT:    fldl (%esp)
-; X86-AVX-NEXT:    fistpll (%esp)
-; X86-AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; X86-AVX-NEXT:    vmovd %xmm0, %eax
-; X86-AVX-NEXT:    vpextrd $1, %xmm0, %edx
-; X86-AVX-NEXT:    movl %ebp, %esp
-; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT:    vcvtsd2si {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: lrint_v1f64:
-; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    retq
+; X64-AVX-i32-LABEL: lrint_v1f64:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    retq
 ;
-; X64-AVX-LABEL: lrint_v1f64:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT:    retq
-  %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x)
-  ret <1 x i64> %a
+; X64-AVX-i64-LABEL: lrint_v1f64:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    retq
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+  ret <1 x iXLen> %a
 }
-declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 
-define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v2f64:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
-; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT:    andl $-8, %esp
-; X86-SSE2-NEXT:    subl $16, %esp
-; X86-SSE2-NEXT:    movhps %xmm0, (%esp)
-; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl (%esp)
-; X86-SSE2-NEXT:    fistpll (%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X86-SSE2-NEXT:    movl %ebp, %esp
-; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v2f64:
 ; X86-AVX:       # %bb.0:
-; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
-; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
-; X86-AVX-NEXT:    andl $-8, %esp
-; X86-AVX-NEXT:    subl $16, %esp
-; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
-; X86-AVX-NEXT:    vmovhps %xmm0, (%esp)
-; X86-AVX-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-AVX-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-AVX-NEXT:    fldl (%esp)
-; X86-AVX-NEXT:    fistpll (%esp)
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X86-AVX-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
-; X86-AVX-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X86-AVX-NEXT:    movl %ebp, %esp
-; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X86-AVX-NEXT:    vcvtsd2si %xmm1, %eax
+; X86-AVX-NEXT:    vcvtsd2si %xmm0, %ecx
+; X86-AVX-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; X86-AVX-NEXT:    retl
 ;
-; X64-SSE-LABEL: lrint_v2f64:
-; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm1
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; X64-SSE-NEXT:    movdqa %xmm1, %xmm0
-; X64-SSE-NEXT:    retq
+; X64-AVX-i32-LABEL: lrint_v2f64:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm1, %eax
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %ecx
+; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm0
+; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    retq
 ;
-; X64-AVX-LABEL: lrint_v2f64:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT:    vmovq %rax, %xmm1
-; X64-AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
-; X64-AVX-NEXT:    vcvtsd2si %xmm0, %rax
-; X64-AVX-NEXT:    vmovq %rax, %xmm0
-; X64-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; X64-AVX-NEXT:    retq
-  %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x)
-  ret <2 x i64> %a
+; X64-AVX-i64-LABEL: lrint_v2f64:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-AVX-i64-NEXT:    retq
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
+  ret <2 x iXLen> %a
 }
-declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 
-define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v4f64:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
-; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT:    andl $-8, %esp
-; X86-SSE2-NEXT:    subl $32, %esp
-; X86-SSE2-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movhps %xmm1, (%esp)
-; X86-SSE2-NEXT:    movlps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl (%esp)
-; X86-SSE2-NEXT:    fistpll (%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT:    movl %ebp, %esp
-; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm2
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm0, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-SSE-LABEL: lrint_v4f64:
-; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm2
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; X64-SSE-NEXT:    cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm3
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; X64-SSE-NEXT:    movdqa %xmm2, %xmm0
-; X64-SSE-NEXT:    movdqa %xmm3, %xmm1
-; X64-SSE-NEXT:    retq
-  %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x)
-  ret <4 x i64> %a
+; X86-AVX-LABEL: lrint_v4f64:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X86-AVX-NEXT:    vcvtsd2si %xmm1, %eax
+; X86-AVX-NEXT:    vcvtsd2si %xmm0, %ecx
+; X86-AVX-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X86-AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X86-AVX-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; X86-AVX-NEXT:    vzeroupper
+; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v4f64:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm1, %eax
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %ecx
+; X64-AVX-i32-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX-i32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX-i32-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
+; X64-AVX-i32-NEXT:    vzeroupper
+; X64-AVX-i32-NEXT:    retq
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
+  ret <4 x iXLen> %a
 }
-declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
 
-define <8 x i64> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-SSE2-LABEL: lrint_v8f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
@@ -349,81 +335,40 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; X86-SSE2-NEXT:    movl %esp, %ebp
 ; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-16, %esp
-; X86-SSE2-NEXT:    subl $80, %esp
-; X86-SSE2-NEXT:    movaps 8(%ebp), %xmm3
-; X86-SSE2-NEXT:    movhps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movhps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movlps %xmm1, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movhps %xmm2, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movlps %xmm2, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movhps %xmm3, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movlps %xmm3, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm4 = mem[0],zero
-; X86-SSE2-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
-; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    movapd %xmm0, %xmm3
+; X86-SSE2-NEXT:    movapd 8(%ebp), %xmm4
+; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm5
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm1, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm3, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm3
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm4, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
+; X86-SSE2-NEXT:    cvtsd2si %xmm2, %eax
+; X86-SSE2-NEXT:    movd %eax, %xmm2
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
-;
-; X64-SSE-LABEL: lrint_v8f64:
-; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm4
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm0, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
-; X64-SSE-NEXT:    cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm5
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm1, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
-; X64-SSE-NEXT:    cvtsd2si %xmm2, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm6
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm2, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
-; X64-SSE-NEXT:    cvtsd2si %xmm3, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm7
-; X64-SSE-NEXT:    unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
-; X64-SSE-NEXT:    cvtsd2si %xmm3, %rax
-; X64-SSE-NEXT:    movq %rax, %xmm0
-; X64-SSE-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0]
-; X64-SSE-NEXT:    movdqa %xmm4, %xmm0
-; X64-SSE-NEXT:    movdqa %xmm5, %xmm1
-; X64-SSE-NEXT:    movdqa %xmm6, %xmm2
-; X64-SSE-NEXT:    movdqa %xmm7, %xmm3
-; X64-SSE-NEXT:    retq
-  %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x)
-  ret <8 x i64> %a
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
+  ret <8 x iXLen> %a
 }
-declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)

>From a1ed2f7c087e86de9e962c8c5539c658ac328b09 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <Ramkumar.Ramachandra at imgtec.com>
Date: Fri, 3 Nov 2023 17:33:06 +0000
Subject: [PATCH 2/2] CodeGen/X86: fix conflict between AVX and AVX512, re-run
 UTC

---
 llvm/test/CodeGen/X86/vector-lrint.ll | 440 +++++++++++++++++++++++++-
 1 file changed, 434 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 43d12fdeffa9a91..f527a3584f4470f 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefix=X86-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefix=X86-AVX
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i32
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i32
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefix=X64-AVX-i64
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64-AVX-i64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX1-i32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX512-i32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX1-i64
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX512-i64
 
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; X86-SSE2-LABEL: lrint_v1f32:
@@ -149,6 +149,42 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; X64-AVX-i32-NEXT:    vcvtss2si %xmm0, %eax
 ; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v4f32:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-i64-NEXT:    retq
+;
+; X64-AVX512-i64-LABEL: lrint_v4f32:
+; X64-AVX512-i64:       # %bb.0:
+; X64-AVX512-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX512-i64-NEXT:    retq
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
   ret <4 x iXLen> %a
 }
@@ -191,6 +227,186 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; X86-SSE2-NEXT:    retl
+;
+; X86-AVX1-LABEL: lrint_v8f32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X86-AVX1-NEXT:    vcvtss2si %xmm2, %eax
+; X86-AVX1-NEXT:    vcvtss2si %xmm1, %ecx
+; X86-AVX1-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
+; X86-AVX1-NEXT:    vcvtss2si %xmm3, %eax
+; X86-AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
+; X86-AVX1-NEXT:    vcvtss2si %xmm1, %eax
+; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X86-AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-AVX1-NEXT:    vcvtss2si %xmm2, %eax
+; X86-AVX1-NEXT:    vcvtss2si %xmm0, %ecx
+; X86-AVX1-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X86-AVX1-NEXT:    vcvtss2si %xmm3, %eax
+; X86-AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X86-AVX1-NEXT:    vcvtss2si %xmm0, %eax
+; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX512-LABEL: lrint_v8f32:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX512-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X86-AVX512-NEXT:    vcvtss2si %xmm2, %eax
+; X86-AVX512-NEXT:    vcvtss2si %xmm1, %ecx
+; X86-AVX512-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
+; X86-AVX512-NEXT:    vcvtss2si %xmm3, %eax
+; X86-AVX512-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
+; X86-AVX512-NEXT:    vcvtss2si %xmm1, %eax
+; X86-AVX512-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X86-AVX512-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-AVX512-NEXT:    vcvtss2si %xmm2, %eax
+; X86-AVX512-NEXT:    vcvtss2si %xmm0, %ecx
+; X86-AVX512-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X86-AVX512-NEXT:    vcvtss2si %xmm3, %eax
+; X86-AVX512-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X86-AVX512-NEXT:    vcvtss2si %xmm0, %eax
+; X86-AVX512-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X86-AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-i32-LABEL: lrint_v8f32:
+; X64-AVX1-i32:       # %bb.0:
+; X64-AVX1-i32-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X64-AVX1-i32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm1, %ecx
+; X64-AVX1-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm3, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm1, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X64-AVX1-i32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm0, %ecx
+; X64-AVX1-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm3, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX1-i32-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X64-AVX1-i32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-i32-NEXT:    retq
+;
+; X64-AVX512-i32-LABEL: lrint_v8f32:
+; X64-AVX512-i32:       # %bb.0:
+; X64-AVX512-i32-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X64-AVX512-i32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm1, %ecx
+; X64-AVX512-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX512-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm3, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm1, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X64-AVX512-i32-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm0, %ecx
+; X64-AVX512-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX512-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm3, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; X64-AVX512-i32-NEXT:    vcvtss2si %xmm0, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X64-AVX512-i32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX512-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v8f32:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm3, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-i64-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm3, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX1-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; X64-AVX1-i64-NEXT:    vmovaps %ymm2, %ymm0
+; X64-AVX1-i64-NEXT:    retq
+;
+; X64-AVX512-i64-LABEL: lrint_v8f32:
+; X64-AVX512-i64:       # %bb.0:
+; X64-AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X64-AVX512-i64-NEXT:    vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm3, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; X64-AVX512-i64-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm3 = xmm0[1,0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm3, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-AVX512-i64-NEXT:    vcvtss2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512-i64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-AVX512-i64-NEXT:    retq
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
   ret <8 x iXLen> %a
 }
@@ -321,6 +537,42 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; X64-AVX-i32-NEXT:    vzeroupper
 ; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v4f64:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-i64-NEXT:    retq
+;
+; X64-AVX512-i64-LABEL: lrint_v4f64:
+; X64-AVX512-i64:       # %bb.0:
+; X64-AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX512-i64-NEXT:    retq
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
   ret <4 x iXLen> %a
 }
@@ -368,6 +620,182 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
+;
+; X86-AVX1-LABEL: lrint_v8f64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
+; X86-AVX1-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX1-NEXT:    vcvtsd2si %xmm1, %ecx
+; X86-AVX1-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; X86-AVX1-NEXT:    vcvtsd2si %xmm1, %eax
+; X86-AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X86-AVX1-NEXT:    vcvtsd2si %xmm1, %eax
+; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X86-AVX1-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX1-NEXT:    vcvtsd2si %xmm0, %ecx
+; X86-AVX1-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X86-AVX1-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX512-LABEL: lrint_v8f64:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
+; X86-AVX512-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX512-NEXT:    vcvtsd2si %xmm1, %ecx
+; X86-AVX512-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
+; X86-AVX512-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX512-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X86-AVX512-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX512-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X86-AVX512-NEXT:    vcvtsd2si %xmm2, %eax
+; X86-AVX512-NEXT:    vcvtsd2si %xmm0, %ecx
+; X86-AVX512-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX512-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX512-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X86-AVX512-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X86-AVX512-NEXT:    vcvtsd2si %xmm0, %eax
+; X86-AVX512-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X86-AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX512-NEXT:    retl
+;
+; X64-AVX1-i32-LABEL: lrint_v8f64:
+; X64-AVX1-i32:       # %bb.0:
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm1, %ecx
+; X64-AVX1-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm1, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm1, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm0, %ecx
+; X64-AVX1-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX1-i32-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX1-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X64-AVX1-i32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-i32-NEXT:    retq
+;
+; X64-AVX512-i32-LABEL: lrint_v8f64:
+; X64-AVX512-i32:       # %bb.0:
+; X64-AVX512-i32-NEXT:    vextractf32x4 $2, %zmm0, %xmm1
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm1[1,0]
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm1, %ecx
+; X64-AVX512-i32-NEXT:    vmovd %ecx, %xmm1
+; X64-AVX512-i32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X64-AVX512-i32-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1,0]
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm2, %eax
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm0, %ecx
+; X64-AVX512-i32-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX512-i32-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; X64-AVX512-i32-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX512-i32-NEXT:    vcvtsd2si %xmm0, %eax
+; X64-AVX512-i32-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
+; X64-AVX512-i32-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX512-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v8f64:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX1-i64-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX1-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X64-AVX1-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; X64-AVX1-i64-NEXT:    retq
+;
+; X64-AVX512-i64-LABEL: lrint_v8f64:
+; X64-AVX512-i64:       # %bb.0:
+; X64-AVX512-i64-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm1, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm1
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-AVX512-i64-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X64-AVX512-i64-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm2 = xmm2[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm2, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm2
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm3
+; X64-AVX512-i64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1,0]
+; X64-AVX512-i64-NEXT:    vcvtsd2si %xmm0, %rax
+; X64-AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX512-i64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; X64-AVX512-i64-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512-i64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X64-AVX512-i64-NEXT:    retq
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
   ret <8 x iXLen> %a
 }



More information about the llvm-commits mailing list