[llvm] [SelectionDAG] Move VSelect sign pattern check from AArch64 to general SelectionDAG (PR #151840)

via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 3 10:01:25 PDT 2025


https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/151840

>From 1a4f567701d700d961b9eff8185af9123fcc7404 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 3 Aug 2025 13:00:50 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)

---
 llvm/test/CodeGen/X86/cmp-select-sign.ll | 1029 ++++++++++++++++++++++
 1 file changed, 1029 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/cmp-select-sign.ll

diff --git a/llvm/test/CodeGen/X86/cmp-select-sign.ll b/llvm/test/CodeGen/X86/cmp-select-sign.ll
new file mode 100644
index 0000000000000..18adc66f9a2ff
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmp-select-sign.ll
@@ -0,0 +1,1029 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx  | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX512
+
+define i3 @sign_i3(i3 %a) {
+; CHECK-LABEL: sign_i3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andb $4, %dil
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negb %dil
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orb $1, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i3 %a, -1
+  %res = select i1 %c, i3 1, i3 -1
+  ret i3 %res
+}
+
+define i4 @sign_i4(i4 %a) {
+; CHECK-LABEL: sign_i4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andb $8, %dil
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    negb %dil
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    orb $1, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i4 %a, -1
+  %res = select i1 %c, i4 1, i4 -1
+  ret i4 %res
+}
+
+define i8 @sign_i8(i8 %a) {
+; CHECK-LABEL: sign_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    orb $1, %al
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i8 %a, -1
+  %res = select i1 %c, i8 1, i8 -1
+  ret i8 %res
+}
+
+define i16 @sign_i16(i16 %a) {
+; CHECK-LABEL: sign_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    sarl $15, %eax
+; CHECK-NEXT:    orl $1, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i16 %a, -1
+  %res = select i1 %c, i16 1, i16 -1
+  ret i16 %res
+}
+
+define i32 @sign_i32(i32 %a) {
+; CHECK-LABEL: sign_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    orl $1, %eax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i32 %a, -1
+  %res = select i1 %c, i32 1, i32 -1
+  ret i32 %res
+}
+
+define i64 @sign_i64(i64 %a) {
+; CHECK-LABEL: sign_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    orq $1, %rax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i64 %a, -1
+  %res = select i1 %c, i64 1, i64 -1
+  ret i64 %res
+}
+
+
+define i64 @not_sign_i64(i64 %a) {
+; CHECK-LABEL: not_sign_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    leaq -1(%rax,%rax), %rax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i64 %a, 0
+  %res = select i1 %c, i64 1, i64 -1
+  ret i64 %res
+}
+
+define i64 @not_sign_i64_2(i64 %a) {
+; CHECK-LABEL: not_sign_i64_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i64 %a, -1
+  %res = select i1 %c, i64 0, i64 -1
+  ret i64 %res
+}
+
+define i64 @not_sign_i64_3(i64 %a) {
+; CHECK-LABEL: not_sign_i64_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    notq %rax
+; CHECK-NEXT:    shrq $63, %rax
+; CHECK-NEXT:    retq
+  %c = icmp sgt i64 %a, -1
+  %res = select i1 %c, i64 1, i64 0
+  ret i64 %res
+}
+
+define i64 @not_sign_i64_4(i64 %a) {
+; CHECK-LABEL: not_sign_i64_4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    retq
+  %c = icmp ugt i64 %a, -1
+  %res = select i1 %c, i64 1, i64 -1
+  ret i64 %res
+}
+
+define <7 x i8> @sign_7xi8(<7 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_7xi8:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movq %rdi, %rax
+; CHECK-NOBMI-NEXT:    movl {{[0-9]+}}(%rsp), %edi
+; CHECK-NOBMI-NEXT:    shll $8, %edi
+; CHECK-NOBMI-NEXT:    movzbl %r9b, %r9d
+; CHECK-NOBMI-NEXT:    orl %edi, %r9d
+; CHECK-NOBMI-NEXT:    movzbl %sil, %esi
+; CHECK-NOBMI-NEXT:    movzbl %dl, %edx
+; CHECK-NOBMI-NEXT:    shll $8, %edx
+; CHECK-NOBMI-NEXT:    orl %esi, %edx
+; CHECK-NOBMI-NEXT:    movzbl %cl, %ecx
+; CHECK-NOBMI-NEXT:    shll $16, %ecx
+; CHECK-NOBMI-NEXT:    orl %edx, %ecx
+; CHECK-NOBMI-NEXT:    shll $24, %r8d
+; CHECK-NOBMI-NEXT:    orl %ecx, %r8d
+; CHECK-NOBMI-NEXT:    movd %r8d, %xmm0
+; CHECK-NOBMI-NEXT:    pinsrw $2, %r9d, %xmm0
+; CHECK-NOBMI-NEXT:    pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT:    movd %xmm1, (%rax)
+; CHECK-NOBMI-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NOBMI-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; CHECK-NOBMI-NEXT:    movb %cl, 6(%rax)
+; CHECK-NOBMI-NEXT:    pextrw $2, %xmm1, %ecx
+; CHECK-NOBMI-NEXT:    movw %cx, 4(%rax)
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_7xi8:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    movq %rdi, %rax
+; CHECK-BMI2-SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %edi
+; CHECK-BMI2-SSE2-NEXT:    shll $8, %edi
+; CHECK-BMI2-SSE2-NEXT:    movzbl %r9b, %r9d
+; CHECK-BMI2-SSE2-NEXT:    orl %edi, %r9d
+; CHECK-BMI2-SSE2-NEXT:    movzbl %sil, %esi
+; CHECK-BMI2-SSE2-NEXT:    movzbl %dl, %edx
+; CHECK-BMI2-SSE2-NEXT:    shll $8, %edx
+; CHECK-BMI2-SSE2-NEXT:    orl %esi, %edx
+; CHECK-BMI2-SSE2-NEXT:    movzbl %cl, %ecx
+; CHECK-BMI2-SSE2-NEXT:    shll $16, %ecx
+; CHECK-BMI2-SSE2-NEXT:    orl %edx, %ecx
+; CHECK-BMI2-SSE2-NEXT:    shll $24, %r8d
+; CHECK-BMI2-SSE2-NEXT:    orl %ecx, %r8d
+; CHECK-BMI2-SSE2-NEXT:    movd %r8d, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    pinsrw $2, %r9d, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT:    movd %xmm1, (%rax)
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-BMI2-SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; CHECK-BMI2-SSE2-NEXT:    movb %cl, 6(%rax)
+; CHECK-BMI2-SSE2-NEXT:    pextrw $2, %xmm1, %ecx
+; CHECK-BMI2-SSE2-NEXT:    movw %cx, 4(%rax)
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX12-LABEL: sign_7xi8:
+; CHECK-AVX12:       # %bb.0:
+; CHECK-AVX12-NEXT:    movq %rdi, %rax
+; CHECK-AVX12-NEXT:    vmovd %esi, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $3, %r8d, %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $4, %r9d, %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    vpextrb $6, %xmm0, 6(%rdi)
+; CHECK-AVX12-NEXT:    vpextrw $2, %xmm0, 4(%rdi)
+; CHECK-AVX12-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-AVX12-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_7xi8:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    movq %rdi, %rax
+; CHECK-AVX512-NEXT:    vmovd %esi, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $3, %r8d, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $4, %r9d, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpextrb $6, %xmm0, 6(%rdi)
+; CHECK-AVX512-NEXT:    vpextrw $2, %xmm0, 4(%rdi)
+; CHECK-AVX512-NEXT:    vmovd %xmm0, (%rdi)
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %res = select <7 x i1> %c, <7 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <7 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <7 x i8> %res
+}
+
+define <8 x i8> @sign_8xi8(<8 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_8xi8:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_8xi8:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX12-LABEL: sign_8xi8:
+; CHECK-AVX12:       # %bb.0:
+; CHECK-AVX12-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_8xi8:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %res = select <8 x i1> %c, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <8 x i8> %res
+}
+
+define <16 x i8> @sign_16xi8(<16 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_16xi8:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_16xi8:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX12-LABEL: sign_16xi8:
+; CHECK-AVX12:       # %bb.0:
+; CHECK-AVX12-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_16xi8:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %res = select <16 x i1> %c, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  ret <16 x i8> %res
+}
+
+define <3 x i32> @sign_3xi32(<3 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_3xi32:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    psrad $31, %xmm0
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_3xi32:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: sign_3xi32:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: sign_3xi32:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_3xi32:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; CHECK-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
+  %res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1>
+  ret <3 x i32> %res
+}
+
+define <4 x i32> @sign_4xi32(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi32:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    psrad $31, %xmm0
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi32:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: sign_4xi32:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: sign_4xi32:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_4xi32:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi32_multi_use:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    subq $24, %rsp
+; CHECK-NOBMI-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-NOBMI-NEXT:    psrad $31, %xmm2
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NOBMI-NEXT:    movdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-NOBMI-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    callq use_4xi1 at PLT
+; CHECK-NOBMI-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NOBMI-NEXT:    addq $24, %rsp
+; CHECK-NOBMI-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi32_multi_use:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    subq $24, %rsp
+; CHECK-BMI2-SSE2-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-BMI2-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-BMI2-SSE2-NEXT:    psrad $31, %xmm2
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    callq use_4xi1 at PLT
+; CHECK-BMI2-SSE2-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-BMI2-SSE2-NEXT:    addq $24, %rsp
+; CHECK-BMI2-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    subq $24, %rsp
+; CHECK-AVX1-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT:    vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    callq use_4xi1 at PLT
+; CHECK-AVX1-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX1-NEXT:    addq $24, %rsp
+; CHECK-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    subq $24, %rsp
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1]
+; CHECK-AVX2-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; CHECK-AVX2-NEXT:    vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    callq use_4xi1 at PLT
+; CHECK-AVX2-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT:    addq $24, %rsp
+; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    subq $24, %rsp
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
+; CHECK-AVX512-NEXT:    vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    callq use_4xi1 at PLT
+; CHECK-AVX512-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX512-NEXT:    addq $24, %rsp
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  call void @use_4xi1(<4 x i1> %c)
+  ret <4 x i32> %res
+}
+
+; Second icmp operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: not_sign_4xi32:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: not_sign_4xi32:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: not_sign_4xi32:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
+; CHECK-AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <4 x i32> %a, <i32 1, i32 -1, i32 -1, i32 -1>
+  %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %res
+}
+
+; First select operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32_2:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    psrad $31, %xmm0
+; CHECK-NOBMI-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_2:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX-LABEL: not_sign_4xi32_2:
+; CHECK-AVX:       # %bb.0:
+; CHECK-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT:    retq
+  %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 -1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %res
+}
+
+; Second select operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32_3:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT:    psubd %xmm0, %xmm1
+; CHECK-NOBMI-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_3:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT:    psubd %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX12-LABEL: not_sign_4xi32_3:
+; CHECK-AVX12:       # %bb.0:
+; CHECK-AVX12-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX12-NEXT:    vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-AVX12-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: not_sign_4xi32_3:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
+; CHECK-AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,4294967295,1]
+; CHECK-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 1>
+  ret <4 x i32> %res
+}
+
+; i65 is not legal.
+define <4 x i65> @sign_4xi65(<4 x i65> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi65:
+; CHECK-NOBMI:       # %bb.0:
+; CHECK-NOBMI-NEXT:    movq %rdi, %rax
+; CHECK-NOBMI-NEXT:    andl $1, %edx
+; CHECK-NOBMI-NEXT:    negq %rdx
+; CHECK-NOBMI-NEXT:    andl $1, %r8d
+; CHECK-NOBMI-NEXT:    negq %r8
+; CHECK-NOBMI-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NOBMI-NEXT:    andl $1, %edi
+; CHECK-NOBMI-NEXT:    negq %rdi
+; CHECK-NOBMI-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NOBMI-NEXT:    andl $1, %ecx
+; CHECK-NOBMI-NEXT:    negq %rcx
+; CHECK-NOBMI-NEXT:    movq %rcx, %rsi
+; CHECK-NOBMI-NEXT:    orq $1, %rsi
+; CHECK-NOBMI-NEXT:    movq %rdi, %r9
+; CHECK-NOBMI-NEXT:    orq $1, %r9
+; CHECK-NOBMI-NEXT:    movq %r8, %r10
+; CHECK-NOBMI-NEXT:    orq $1, %r10
+; CHECK-NOBMI-NEXT:    movq %rdx, %xmm0
+; CHECK-NOBMI-NEXT:    orq $1, %rdx
+; CHECK-NOBMI-NEXT:    movq %rdx, (%rax)
+; CHECK-NOBMI-NEXT:    movq %rdx, %xmm1
+; CHECK-NOBMI-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT:    movq %xmm0, %rdx
+; CHECK-NOBMI-NEXT:    leaq (%r10,%r10), %r11
+; CHECK-NOBMI-NEXT:    subq %rdx, %r11
+; CHECK-NOBMI-NEXT:    movq %r11, 8(%rax)
+; CHECK-NOBMI-NEXT:    movq %rdi, %xmm0
+; CHECK-NOBMI-NEXT:    movq %r9, %xmm1
+; CHECK-NOBMI-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT:    movd %xmm0, %edx
+; CHECK-NOBMI-NEXT:    andl $1, %edx
+; CHECK-NOBMI-NEXT:    shrq $62, %rdi
+; CHECK-NOBMI-NEXT:    leaq (%rdi,%rdx,4), %rdx
+; CHECK-NOBMI-NEXT:    leaq (%rdx,%rsi,8), %rdx
+; CHECK-NOBMI-NEXT:    movq %rdx, 24(%rax)
+; CHECK-NOBMI-NEXT:    movq %r8, %xmm0
+; CHECK-NOBMI-NEXT:    movq %r10, %xmm1
+; CHECK-NOBMI-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT:    movd %xmm0, %edx
+; CHECK-NOBMI-NEXT:    andl $1, %edx
+; CHECK-NOBMI-NEXT:    shrq $63, %r8
+; CHECK-NOBMI-NEXT:    leaq (%r8,%rdx,2), %rdx
+; CHECK-NOBMI-NEXT:    leaq (%rdx,%r9,4), %rdx
+; CHECK-NOBMI-NEXT:    movq %rdx, 16(%rax)
+; CHECK-NOBMI-NEXT:    movq %rcx, %xmm0
+; CHECK-NOBMI-NEXT:    shrq $61, %rcx
+; CHECK-NOBMI-NEXT:    movq %rsi, %xmm1
+; CHECK-NOBMI-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT:    movd %xmm0, %edx
+; CHECK-NOBMI-NEXT:    leal (%rcx,%rdx,8), %ecx
+; CHECK-NOBMI-NEXT:    andl $15, %ecx
+; CHECK-NOBMI-NEXT:    movb %cl, 32(%rax)
+; CHECK-NOBMI-NEXT:    retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi65:
+; CHECK-BMI2-SSE2:       # %bb.0:
+; CHECK-BMI2-SSE2-NEXT:    movq %rdi, %rax
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT:    negq %rdx
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %r8d
+; CHECK-BMI2-SSE2-NEXT:    negq %r8
+; CHECK-BMI2-SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %edi
+; CHECK-BMI2-SSE2-NEXT:    negq %rdi
+; CHECK-BMI2-SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %ecx
+; CHECK-BMI2-SSE2-NEXT:    negq %rcx
+; CHECK-BMI2-SSE2-NEXT:    movq %rcx, %rsi
+; CHECK-BMI2-SSE2-NEXT:    orq $1, %rsi
+; CHECK-BMI2-SSE2-NEXT:    movq %rdi, %r9
+; CHECK-BMI2-SSE2-NEXT:    orq $1, %r9
+; CHECK-BMI2-SSE2-NEXT:    movq %r8, %r10
+; CHECK-BMI2-SSE2-NEXT:    orq $1, %r10
+; CHECK-BMI2-SSE2-NEXT:    movq %rdx, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    orq $1, %rdx
+; CHECK-BMI2-SSE2-NEXT:    movq %rdx, (%rax)
+; CHECK-BMI2-SSE2-NEXT:    movq %rdx, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT:    movq %xmm0, %rdx
+; CHECK-BMI2-SSE2-NEXT:    leaq (%r10,%r10), %r11
+; CHECK-BMI2-SSE2-NEXT:    subq %rdx, %r11
+; CHECK-BMI2-SSE2-NEXT:    movq %r11, 8(%rax)
+; CHECK-BMI2-SSE2-NEXT:    movq %rdi, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    movq %r9, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT:    movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT:    shrq $62, %rdi
+; CHECK-BMI2-SSE2-NEXT:    leaq (%rdi,%rdx,4), %rdx
+; CHECK-BMI2-SSE2-NEXT:    leaq (%rdx,%rsi,8), %rdx
+; CHECK-BMI2-SSE2-NEXT:    movq %rdx, 24(%rax)
+; CHECK-BMI2-SSE2-NEXT:    movq %r8, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    movq %r10, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT:    movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT:    andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT:    shrq $63, %r8
+; CHECK-BMI2-SSE2-NEXT:    leaq (%r8,%rdx,2), %rdx
+; CHECK-BMI2-SSE2-NEXT:    leaq (%rdx,%r9,4), %rdx
+; CHECK-BMI2-SSE2-NEXT:    movq %rdx, 16(%rax)
+; CHECK-BMI2-SSE2-NEXT:    movq %rcx, %xmm0
+; CHECK-BMI2-SSE2-NEXT:    shrq $61, %rcx
+; CHECK-BMI2-SSE2-NEXT:    movq %rsi, %xmm1
+; CHECK-BMI2-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT:    movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT:    leal (%rcx,%rdx,8), %ecx
+; CHECK-BMI2-SSE2-NEXT:    andl $15, %ecx
+; CHECK-BMI2-SSE2-NEXT:    movb %cl, 32(%rax)
+; CHECK-BMI2-SSE2-NEXT:    retq
+;
+; CHECK-AVX12-LABEL: sign_4xi65:
+; CHECK-AVX12:       # %bb.0:
+; CHECK-AVX12-NEXT:    movq %rdi, %rax
+; CHECK-AVX12-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-AVX12-NEXT:    andl $1, %edi
+; CHECK-AVX12-NEXT:    movq %rdi, %rcx
+; CHECK-AVX12-NEXT:    negq %rcx
+; CHECK-AVX12-NEXT:    andl $1, %r8d
+; CHECK-AVX12-NEXT:    movq %r8, %r9
+; CHECK-AVX12-NEXT:    negq %r9
+; CHECK-AVX12-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; CHECK-AVX12-NEXT:    andl $1, %esi
+; CHECK-AVX12-NEXT:    movq %rsi, %r10
+; CHECK-AVX12-NEXT:    negq %r10
+; CHECK-AVX12-NEXT:    andl $1, %edx
+; CHECK-AVX12-NEXT:    movq %rdx, %r11
+; CHECK-AVX12-NEXT:    negq %r11
+; CHECK-AVX12-NEXT:    orq $1, %r11
+; CHECK-AVX12-NEXT:    movq %r11, (%rax)
+; CHECK-AVX12-NEXT:    addl %r8d, %r8d
+; CHECK-AVX12-NEXT:    subq %r8, %rdx
+; CHECK-AVX12-NEXT:    orq $2, %rdx
+; CHECK-AVX12-NEXT:    movq %rdx, 8(%rax)
+; CHECK-AVX12-NEXT:    movq %r10, %rdx
+; CHECK-AVX12-NEXT:    shrq $62, %rdx
+; CHECK-AVX12-NEXT:    andl $1, %r10d
+; CHECK-AVX12-NEXT:    leaq (%rdx,%r10,4), %rdx
+; CHECK-AVX12-NEXT:    shll $3, %edi
+; CHECK-AVX12-NEXT:    subq %rdi, %rdx
+; CHECK-AVX12-NEXT:    orq $8, %rdx
+; CHECK-AVX12-NEXT:    movq %rdx, 24(%rax)
+; CHECK-AVX12-NEXT:    movq %r9, %rdx
+; CHECK-AVX12-NEXT:    shrq $63, %rdx
+; CHECK-AVX12-NEXT:    andl $1, %r9d
+; CHECK-AVX12-NEXT:    leaq (%rdx,%r9,2), %rdx
+; CHECK-AVX12-NEXT:    shll $2, %esi
+; CHECK-AVX12-NEXT:    subq %rsi, %rdx
+; CHECK-AVX12-NEXT:    orq $4, %rdx
+; CHECK-AVX12-NEXT:    movq %rdx, 16(%rax)
+; CHECK-AVX12-NEXT:    movq %rcx, %rdx
+; CHECK-AVX12-NEXT:    shrq $61, %rdx
+; CHECK-AVX12-NEXT:    leal (%rdx,%rcx,8), %ecx
+; CHECK-AVX12-NEXT:    andl $15, %ecx
+; CHECK-AVX12-NEXT:    movb %cl, 32(%rax)
+; CHECK-AVX12-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: sign_4xi65:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    pushq %rbx
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -16
+; CHECK-AVX512-NEXT:    movq %rdi, %rax
+; CHECK-AVX512-NEXT:    andl $1, %edx
+; CHECK-AVX512-NEXT:    negq %rdx
+; CHECK-AVX512-NEXT:    andl $1, %r8d
+; CHECK-AVX512-NEXT:    negq %r8
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT:    andl $1, %ecx
+; CHECK-AVX512-NEXT:    negq %rcx
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; CHECK-AVX512-NEXT:    andl $1, %esi
+; CHECK-AVX512-NEXT:    negq %rsi
+; CHECK-AVX512-NEXT:    movl $1, %r9d
+; CHECK-AVX512-NEXT:    movq $-1, %r10
+; CHECK-AVX512-NEXT:    movq $-1, %rdi
+; CHECK-AVX512-NEXT:    cmovnsq %r9, %rdi
+; CHECK-AVX512-NEXT:    testq %rcx, %rcx
+; CHECK-AVX512-NEXT:    movq $-1, %r11
+; CHECK-AVX512-NEXT:    cmovnsq %r9, %r11
+; CHECK-AVX512-NEXT:    testq %r8, %r8
+; CHECK-AVX512-NEXT:    movq $-1, %rbx
+; CHECK-AVX512-NEXT:    cmovnsq %r9, %rbx
+; CHECK-AVX512-NEXT:    testq %rdx, %rdx
+; CHECK-AVX512-NEXT:    cmovnsq %r9, %r10
+; CHECK-AVX512-NEXT:    shrq $63, %rsi
+; CHECK-AVX512-NEXT:    shrq $63, %rcx
+; CHECK-AVX512-NEXT:    shrq $63, %r8
+; CHECK-AVX512-NEXT:    movq %r10, (%rax)
+; CHECK-AVX512-NEXT:    shrdq $63, %rbx, %rdx
+; CHECK-AVX512-NEXT:    movq %rdx, 8(%rax)
+; CHECK-AVX512-NEXT:    shldq $1, %rbx, %r8
+; CHECK-AVX512-NEXT:    leaq (%r8,%r11,4), %rdx
+; CHECK-AVX512-NEXT:    movq %rdx, 16(%rax)
+; CHECK-AVX512-NEXT:    shrq $62, %r11
+; CHECK-AVX512-NEXT:    leaq (%r11,%rcx,4), %rcx
+; CHECK-AVX512-NEXT:    leaq (%rcx,%rdi,8), %rcx
+; CHECK-AVX512-NEXT:    movq %rcx, 24(%rax)
+; CHECK-AVX512-NEXT:    shrq $61, %rdi
+; CHECK-AVX512-NEXT:    leal (%rdi,%rsi,8), %ecx
+; CHECK-AVX512-NEXT:    movb %cl, 32(%rax)
+; CHECK-AVX512-NEXT:    popq %rbx
+; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-AVX512-NEXT:    retq
+  %c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
+  %res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
+  ret <4 x i65> %res
+}
+
+define i32 @or_neg(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp sgt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ugt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp ugt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_no_smin(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    retq
+  %4 = sub i32 0, %x
+  %5 = icmp sgt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_ult_no_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult_no_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    retq
+  %4 = sub i32 0, %x
+  %5 = icmp ult i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin_but_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovgl %edi, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %ecx
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    retq
+  %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+  %4 = sub i32 0, %3
+  %5 = icmp sgt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_slt_zero_but_no_smin(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_slt_zero_but_no_smin:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $9, %edi
+; CHECK-NEXT:    movl $9, %ecx
+; CHECK-NEXT:    cmovbl %edi, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %ecx
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+  %3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
+  %4 = sub i32 0, %3
+  %5 = icmp ugt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setge %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp sge i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg3(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setl %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp slt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg4(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setle %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp sle i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    retq
+  %3 = or i32 %x, 1
+  %4 = sub i32 0, %3
+  %5 = icmp ugt i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_no_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setge %al
+; CHECK-NEXT:    retq
+  %4 = sub i32 0, %x
+  %5 = icmp sge i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_ult_no_zero2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult_no_zero2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    negl %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    retq
+  %4 = sub i32 0, %x
+  %5 = icmp ult i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin_but_zero2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovgl %edi, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %ecx
+; CHECK-NEXT:    setle %al
+; CHECK-NEXT:    retq
+  %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+  %4 = sub i32 0, %3
+  %5 = icmp sle i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+define i32 @or_neg_slt_zero_but_no_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_slt_zero_but_no_smin2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $9, %edi
+; CHECK-NEXT:    movl $9, %ecx
+; CHECK-NEXT:    cmovbl %edi, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %ecx
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retq
+  %3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
+  %4 = sub i32 0, %3
+  %5 = icmp uge i32 %4, %y
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare void @use_4xi1(<4 x i1>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BMI2: {{.*}}
+; CHECK-NOBMI-SSE2: {{.*}}

>From adfea58ac082b34e6abee15e29a78ec0e1a7b25f Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 3 Aug 2025 13:01:08 -0400
Subject: [PATCH 2/2] [SelectionDAG] Move VSelect sign pattern check from
 AArch64 general SelectionDAG

For some reason the check is already there, but it bails out. Probably to allow aarch64 to transform it, but why not have the logic in selectiondag then?
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 ++-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 31 ---------
 llvm/test/CodeGen/X86/cmp-select-sign.ll      | 69 +++++++++----------
 3 files changed, 40 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 11e869aebe7da..37cc7b02ed9b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13142,8 +13142,13 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
       ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
       TValAPInt.isOne() &&
       ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
-      ISD::isConstantSplatVectorAllOnes(FVal.getNode())) {
-    return SDValue();
+      ISD::isConstantSplatVectorAllOnes(FVal.getNode()) &&
+      !TLI.shouldAvoidTransformToShift(VT, VT.getScalarSizeInBits() - 1)) {
+    SDValue LHS = Cond.getOperand(0);
+    EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+    SDValue ShiftC = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, ShiftVT);
+    SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, ShiftC);
+    return DAG.getNode(ISD::OR, DL, VT, Shift, TVal);
   }
 
   // To use the condition operand as a bitwise mask, it must have elements that
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2b6ea86ee1af5..cb0964dcbf546 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25887,38 +25887,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
-  // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
-  // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
-  // supported types.
   SDValue SetCC = N->getOperand(0);
-  if (SetCC.getOpcode() == ISD::SETCC &&
-      SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
-    SDValue CmpLHS = SetCC.getOperand(0);
-    EVT VT = CmpLHS.getValueType();
-    SDNode *CmpRHS = SetCC.getOperand(1).getNode();
-    SDNode *SplatLHS = N->getOperand(1).getNode();
-    SDNode *SplatRHS = N->getOperand(2).getNode();
-    APInt SplatLHSVal;
-    if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
-        VT.isSimple() &&
-        is_contained(ArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
-                               MVT::v2i32, MVT::v4i32, MVT::v2i64}),
-                     VT.getSimpleVT().SimpleTy) &&
-        ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
-        SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
-        ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
-      unsigned NumElts = VT.getVectorNumElements();
-      SmallVector<SDValue, 8> Ops(
-          NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
-                                   VT.getScalarType()));
-      SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
-
-      auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
-      auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
-      return Or;
-    }
-  }
-
   EVT CmpVT = N0.getOperand(0).getValueType();
   if (N0.getOpcode() != ISD::SETCC ||
       CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
diff --git a/llvm/test/CodeGen/X86/cmp-select-sign.ll b/llvm/test/CodeGen/X86/cmp-select-sign.ll
index 18adc66f9a2ff..889d5c7d780bc 100644
--- a/llvm/test/CodeGen/X86/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/X86/cmp-select-sign.ll
@@ -331,10 +331,8 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) {
 ;
 ; CHECK-AVX512-LABEL: sign_3xi32:
 ; CHECK-AVX512:       # %bb.0:
-; CHECK-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %k1
-; CHECK-AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 {%k1} = [1,1,1,1]
-; CHECK-AVX512-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-AVX512-NEXT:    vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX512-NEXT:    vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; CHECK-AVX512-NEXT:    retq
   %c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
   %res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1>
@@ -747,41 +745,40 @@ define <4 x i65> @sign_4xi65(<4 x i65> %a) {
 ; CHECK-AVX512-NEXT:    movq %rdi, %rax
 ; CHECK-AVX512-NEXT:    andl $1, %edx
 ; CHECK-AVX512-NEXT:    negq %rdx
-; CHECK-AVX512-NEXT:    andl $1, %r8d
-; CHECK-AVX512-NEXT:    negq %r8
 ; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-AVX512-NEXT:    andl $1, %ecx
-; CHECK-AVX512-NEXT:    negq %rcx
-; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
-; CHECK-AVX512-NEXT:    andl $1, %esi
+; CHECK-AVX512-NEXT:    movq %rcx, %r9
+; CHECK-AVX512-NEXT:    negq %r9
+; CHECK-AVX512-NEXT:    andl $1, %r8d
+; CHECK-AVX512-NEXT:    negq %r8
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; CHECK-AVX512-NEXT:    andl $1, %r11d
+; CHECK-AVX512-NEXT:    movq %r11, %rsi
 ; CHECK-AVX512-NEXT:    negq %rsi
-; CHECK-AVX512-NEXT:    movl $1, %r9d
-; CHECK-AVX512-NEXT:    movq $-1, %r10
-; CHECK-AVX512-NEXT:    movq $-1, %rdi
-; CHECK-AVX512-NEXT:    cmovnsq %r9, %rdi
-; CHECK-AVX512-NEXT:    testq %rcx, %rcx
-; CHECK-AVX512-NEXT:    movq $-1, %r11
-; CHECK-AVX512-NEXT:    cmovnsq %r9, %r11
-; CHECK-AVX512-NEXT:    testq %r8, %r8
-; CHECK-AVX512-NEXT:    movq $-1, %rbx
-; CHECK-AVX512-NEXT:    cmovnsq %r9, %rbx
-; CHECK-AVX512-NEXT:    testq %rdx, %rdx
-; CHECK-AVX512-NEXT:    cmovnsq %r9, %r10
-; CHECK-AVX512-NEXT:    shrq $63, %rsi
-; CHECK-AVX512-NEXT:    shrq $63, %rcx
-; CHECK-AVX512-NEXT:    shrq $63, %r8
-; CHECK-AVX512-NEXT:    movq %r10, (%rax)
-; CHECK-AVX512-NEXT:    shrdq $63, %rbx, %rdx
-; CHECK-AVX512-NEXT:    movq %rdx, 8(%rax)
-; CHECK-AVX512-NEXT:    shldq $1, %rbx, %r8
-; CHECK-AVX512-NEXT:    leaq (%r8,%r11,4), %rdx
-; CHECK-AVX512-NEXT:    movq %rdx, 16(%rax)
-; CHECK-AVX512-NEXT:    shrq $62, %r11
-; CHECK-AVX512-NEXT:    leaq (%r11,%rcx,4), %rcx
-; CHECK-AVX512-NEXT:    leaq (%rcx,%rdi,8), %rcx
-; CHECK-AVX512-NEXT:    movq %rcx, 24(%rax)
-; CHECK-AVX512-NEXT:    shrq $61, %rdi
-; CHECK-AVX512-NEXT:    leal (%rdi,%rsi,8), %ecx
+; CHECK-AVX512-NEXT:    movq %rsi, %rdi
+; CHECK-AVX512-NEXT:    shrq $63, %rdi
+; CHECK-AVX512-NEXT:    movq %r8, %r10
+; CHECK-AVX512-NEXT:    shrq $63, %r10
+; CHECK-AVX512-NEXT:    movq %r9, %rbx
+; CHECK-AVX512-NEXT:    shrq $63, %rbx
+; CHECK-AVX512-NEXT:    shldq $1, %r8, %r10
+; CHECK-AVX512-NEXT:    orq $1, %r8
+; CHECK-AVX512-NEXT:    shldq $1, %rdx, %r8
+; CHECK-AVX512-NEXT:    orq $1, %rdx
+; CHECK-AVX512-NEXT:    movq %rdx, (%rax)
+; CHECK-AVX512-NEXT:    movq %r8, 8(%rax)
+; CHECK-AVX512-NEXT:    shrq $62, %r9
+; CHECK-AVX512-NEXT:    leaq (%r9,%rbx,4), %rdx
+; CHECK-AVX512-NEXT:    shll $3, %r11d
+; CHECK-AVX512-NEXT:    subq %r11, %rdx
+; CHECK-AVX512-NEXT:    orq $8, %rdx
+; CHECK-AVX512-NEXT:    movq %rdx, 24(%rax)
+; CHECK-AVX512-NEXT:    shll $2, %ecx
+; CHECK-AVX512-NEXT:    subq %rcx, %r10
+; CHECK-AVX512-NEXT:    orq $4, %r10
+; CHECK-AVX512-NEXT:    movq %r10, 16(%rax)
+; CHECK-AVX512-NEXT:    shrq $61, %rsi
+; CHECK-AVX512-NEXT:    leal (%rsi,%rdi,8), %ecx
 ; CHECK-AVX512-NEXT:    movb %cl, 32(%rax)
 ; CHECK-AVX512-NEXT:    popq %rbx
 ; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8



More information about the llvm-commits mailing list