[llvm] [SelectionDAG] Move VSelect sign pattern check from AArch64 to general SelectionDAG (PR #151840)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 3 10:01:25 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/151840
>From 1a4f567701d700d961b9eff8185af9123fcc7404 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 3 Aug 2025 13:00:50 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/X86/cmp-select-sign.ll | 1029 ++++++++++++++++++++++
1 file changed, 1029 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/cmp-select-sign.ll
diff --git a/llvm/test/CodeGen/X86/cmp-select-sign.ll b/llvm/test/CodeGen/X86/cmp-select-sign.ll
new file mode 100644
index 0000000000000..18adc66f9a2ff
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmp-select-sign.ll
@@ -0,0 +1,1029 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX512
+
+define i3 @sign_i3(i3 %a) {
+; CHECK-LABEL: sign_i3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $4, %dil
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orb $1, %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %c = icmp sgt i3 %a, -1
+ %res = select i1 %c, i3 1, i3 -1
+ ret i3 %res
+}
+
+define i4 @sign_i4(i4 %a) {
+; CHECK-LABEL: sign_i4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $8, %dil
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: orb $1, %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %c = icmp sgt i4 %a, -1
+ %res = select i1 %c, i4 1, i4 -1
+ ret i4 %res
+}
+
+define i8 @sign_i8(i8 %a) {
+; CHECK-LABEL: sign_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: orb $1, %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %c = icmp sgt i8 %a, -1
+ %res = select i1 %c, i8 1, i8 -1
+ ret i8 %res
+}
+
+define i16 @sign_i16(i16 %a) {
+; CHECK-LABEL: sign_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movswl %di, %eax
+; CHECK-NEXT: sarl $15, %eax
+; CHECK-NEXT: orl $1, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+ %c = icmp sgt i16 %a, -1
+ %res = select i1 %c, i16 1, i16 -1
+ ret i16 %res
+}
+
+define i32 @sign_i32(i32 %a) {
+; CHECK-LABEL: sign_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: sarl $31, %eax
+; CHECK-NEXT: orl $1, %eax
+; CHECK-NEXT: retq
+ %c = icmp sgt i32 %a, -1
+ %res = select i1 %c, i32 1, i32 -1
+ ret i32 %res
+}
+
+define i64 @sign_i64(i64 %a) {
+; CHECK-LABEL: sign_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: orq $1, %rax
+; CHECK-NEXT: retq
+ %c = icmp sgt i64 %a, -1
+ %res = select i1 %c, i64 1, i64 -1
+ ret i64 %res
+}
+
+
+define i64 @not_sign_i64(i64 %a) {
+; CHECK-LABEL: not_sign_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: leaq -1(%rax,%rax), %rax
+; CHECK-NEXT: retq
+ %c = icmp sgt i64 %a, 0
+ %res = select i1 %c, i64 1, i64 -1
+ ret i64 %res
+}
+
+define i64 @not_sign_i64_2(i64 %a) {
+; CHECK-LABEL: not_sign_i64_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: retq
+ %c = icmp sgt i64 %a, -1
+ %res = select i1 %c, i64 0, i64 -1
+ ret i64 %res
+}
+
+define i64 @not_sign_i64_3(i64 %a) {
+; CHECK-LABEL: not_sign_i64_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: notq %rax
+; CHECK-NEXT: shrq $63, %rax
+; CHECK-NEXT: retq
+ %c = icmp sgt i64 %a, -1
+ %res = select i1 %c, i64 1, i64 0
+ ret i64 %res
+}
+
+define i64 @not_sign_i64_4(i64 %a) {
+; CHECK-LABEL: not_sign_i64_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: retq
+ %c = icmp ugt i64 %a, -1
+ %res = select i1 %c, i64 1, i64 -1
+ ret i64 %res
+}
+
+define <7 x i8> @sign_7xi8(<7 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_7xi8:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movq %rdi, %rax
+; CHECK-NOBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
+; CHECK-NOBMI-NEXT: shll $8, %edi
+; CHECK-NOBMI-NEXT: movzbl %r9b, %r9d
+; CHECK-NOBMI-NEXT: orl %edi, %r9d
+; CHECK-NOBMI-NEXT: movzbl %sil, %esi
+; CHECK-NOBMI-NEXT: movzbl %dl, %edx
+; CHECK-NOBMI-NEXT: shll $8, %edx
+; CHECK-NOBMI-NEXT: orl %esi, %edx
+; CHECK-NOBMI-NEXT: movzbl %cl, %ecx
+; CHECK-NOBMI-NEXT: shll $16, %ecx
+; CHECK-NOBMI-NEXT: orl %edx, %ecx
+; CHECK-NOBMI-NEXT: shll $24, %r8d
+; CHECK-NOBMI-NEXT: orl %ecx, %r8d
+; CHECK-NOBMI-NEXT: movd %r8d, %xmm0
+; CHECK-NOBMI-NEXT: pinsrw $2, %r9d, %xmm0
+; CHECK-NOBMI-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT: movd %xmm1, (%rax)
+; CHECK-NOBMI-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-NOBMI-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; CHECK-NOBMI-NEXT: movb %cl, 6(%rax)
+; CHECK-NOBMI-NEXT: pextrw $2, %xmm1, %ecx
+; CHECK-NOBMI-NEXT: movw %cx, 4(%rax)
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_7xi8:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax
+; CHECK-BMI2-SSE2-NEXT: movl {{[0-9]+}}(%rsp), %edi
+; CHECK-BMI2-SSE2-NEXT: shll $8, %edi
+; CHECK-BMI2-SSE2-NEXT: movzbl %r9b, %r9d
+; CHECK-BMI2-SSE2-NEXT: orl %edi, %r9d
+; CHECK-BMI2-SSE2-NEXT: movzbl %sil, %esi
+; CHECK-BMI2-SSE2-NEXT: movzbl %dl, %edx
+; CHECK-BMI2-SSE2-NEXT: shll $8, %edx
+; CHECK-BMI2-SSE2-NEXT: orl %esi, %edx
+; CHECK-BMI2-SSE2-NEXT: movzbl %cl, %ecx
+; CHECK-BMI2-SSE2-NEXT: shll $16, %ecx
+; CHECK-BMI2-SSE2-NEXT: orl %edx, %ecx
+; CHECK-BMI2-SSE2-NEXT: shll $24, %r8d
+; CHECK-BMI2-SSE2-NEXT: orl %ecx, %r8d
+; CHECK-BMI2-SSE2-NEXT: movd %r8d, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pinsrw $2, %r9d, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT: movd %xmm1, (%rax)
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; CHECK-BMI2-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; CHECK-BMI2-SSE2-NEXT: movb %cl, 6(%rax)
+; CHECK-BMI2-SSE2-NEXT: pextrw $2, %xmm1, %ecx
+; CHECK-BMI2-SSE2-NEXT: movw %cx, 4(%rax)
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX12-LABEL: sign_7xi8:
+; CHECK-AVX12: # %bb.0:
+; CHECK-AVX12-NEXT: movq %rdi, %rax
+; CHECK-AVX12-NEXT: vmovd %esi, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT: vpextrb $6, %xmm0, 6(%rdi)
+; CHECK-AVX12-NEXT: vpextrw $2, %xmm0, 4(%rdi)
+; CHECK-AVX12-NEXT: vmovd %xmm0, (%rdi)
+; CHECK-AVX12-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_7xi8:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: movq %rdi, %rax
+; CHECK-AVX512-NEXT: vmovd %esi, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpextrb $6, %xmm0, 6(%rdi)
+; CHECK-AVX512-NEXT: vpextrw $2, %xmm0, 4(%rdi)
+; CHECK-AVX512-NEXT: vmovd %xmm0, (%rdi)
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = select <7 x i1> %c, <7 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <7 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <7 x i8> %res
+}
+
+define <8 x i8> @sign_8xi8(<8 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_8xi8:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_8xi8:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX12-LABEL: sign_8xi8:
+; CHECK-AVX12: # %bb.0:
+; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_8xi8:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = select <8 x i1> %c, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <8 x i8> %res
+}
+
+define <16 x i8> @sign_16xi8(<16 x i8> %a) {
+; CHECK-NOBMI-LABEL: sign_16xi8:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_16xi8:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX12-LABEL: sign_16xi8:
+; CHECK-AVX12: # %bb.0:
+; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX12-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_16xi8:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %res = select <16 x i1> %c, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ ret <16 x i8> %res
+}
+
+define <3 x i32> @sign_3xi32(<3 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_3xi32:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: psrad $31, %xmm0
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_3xi32:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX1-LABEL: sign_3xi32:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: sign_3xi32:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_3xi32:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
+; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
+ %res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1>
+ ret <3 x i32> %res
+}
+
+define <4 x i32> @sign_4xi32(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi32:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: psrad $31, %xmm0
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi32:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX1-LABEL: sign_4xi32:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: sign_4xi32:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_4xi32:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi32_multi_use:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: subq $24, %rsp
+; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOBMI-NEXT: psrad $31, %xmm2
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NOBMI-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: callq use_4xi1 at PLT
+; CHECK-NOBMI-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NOBMI-NEXT: addq $24, %rsp
+; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi32_multi_use:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: subq $24, %rsp
+; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 32
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm2
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: callq use_4xi1 at PLT
+; CHECK-BMI2-SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-BMI2-SSE2-NEXT: addq $24, %rsp
+; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 8
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX1-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: subq $24, %rsp
+; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 32
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: callq use_4xi1 at PLT
+; CHECK-AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX1-NEXT: addq $24, %rsp
+; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: subq $24, %rsp
+; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1]
+; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2
+; CHECK-AVX2-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: callq use_4xi1 at PLT
+; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: addq $24, %rsp
+; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_4xi32_multi_use:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: subq $24, %rsp
+; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm2
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
+; CHECK-AVX512-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
+; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: callq use_4xi1 at PLT
+; CHECK-AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-AVX512-NEXT: addq $24, %rsp
+; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ call void @use_4xi1(<4 x i1> %c)
+ ret <4 x i32> %res
+}
+
+; Second icmp operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX1-LABEL: not_sign_4xi32:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: not_sign_4xi32:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: not_sign_4xi32:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <4 x i32> %a, <i32 1, i32 -1, i32 -1, i32 -1>
+ %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %res
+}
+
+; First select operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32_2:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: psrad $31, %xmm0
+; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_2:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
+; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX-LABEL: not_sign_4xi32_2:
+; CHECK-AVX: # %bb.0:
+; CHECK-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: retq
+ %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 -1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %res
+}
+
+; Second select operand breaks sign pattern.
+define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
+; CHECK-NOBMI-LABEL: not_sign_4xi32_3:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NOBMI-NEXT: psubd %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_3:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-BMI2-SSE2-NEXT: psubd %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX12-LABEL: not_sign_4xi32_3:
+; CHECK-AVX12: # %bb.0:
+; CHECK-AVX12-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
+; CHECK-AVX12-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; CHECK-AVX12-NEXT: retq
+;
+; CHECK-AVX512-LABEL: not_sign_4xi32_3:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
+; CHECK-AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,4294967295,1]
+; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 1>
+ ret <4 x i32> %res
+}
+
+; i65 is not legal.
+define <4 x i65> @sign_4xi65(<4 x i65> %a) {
+; CHECK-NOBMI-LABEL: sign_4xi65:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movq %rdi, %rax
+; CHECK-NOBMI-NEXT: andl $1, %edx
+; CHECK-NOBMI-NEXT: negq %rdx
+; CHECK-NOBMI-NEXT: andl $1, %r8d
+; CHECK-NOBMI-NEXT: negq %r8
+; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NOBMI-NEXT: andl $1, %edi
+; CHECK-NOBMI-NEXT: negq %rdi
+; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NOBMI-NEXT: andl $1, %ecx
+; CHECK-NOBMI-NEXT: negq %rcx
+; CHECK-NOBMI-NEXT: movq %rcx, %rsi
+; CHECK-NOBMI-NEXT: orq $1, %rsi
+; CHECK-NOBMI-NEXT: movq %rdi, %r9
+; CHECK-NOBMI-NEXT: orq $1, %r9
+; CHECK-NOBMI-NEXT: movq %r8, %r10
+; CHECK-NOBMI-NEXT: orq $1, %r10
+; CHECK-NOBMI-NEXT: movq %rdx, %xmm0
+; CHECK-NOBMI-NEXT: orq $1, %rdx
+; CHECK-NOBMI-NEXT: movq %rdx, (%rax)
+; CHECK-NOBMI-NEXT: movq %rdx, %xmm1
+; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT: movq %xmm0, %rdx
+; CHECK-NOBMI-NEXT: leaq (%r10,%r10), %r11
+; CHECK-NOBMI-NEXT: subq %rdx, %r11
+; CHECK-NOBMI-NEXT: movq %r11, 8(%rax)
+; CHECK-NOBMI-NEXT: movq %rdi, %xmm0
+; CHECK-NOBMI-NEXT: movq %r9, %xmm1
+; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT: movd %xmm0, %edx
+; CHECK-NOBMI-NEXT: andl $1, %edx
+; CHECK-NOBMI-NEXT: shrq $62, %rdi
+; CHECK-NOBMI-NEXT: leaq (%rdi,%rdx,4), %rdx
+; CHECK-NOBMI-NEXT: leaq (%rdx,%rsi,8), %rdx
+; CHECK-NOBMI-NEXT: movq %rdx, 24(%rax)
+; CHECK-NOBMI-NEXT: movq %r8, %xmm0
+; CHECK-NOBMI-NEXT: movq %r10, %xmm1
+; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT: movd %xmm0, %edx
+; CHECK-NOBMI-NEXT: andl $1, %edx
+; CHECK-NOBMI-NEXT: shrq $63, %r8
+; CHECK-NOBMI-NEXT: leaq (%r8,%rdx,2), %rdx
+; CHECK-NOBMI-NEXT: leaq (%rdx,%r9,4), %rdx
+; CHECK-NOBMI-NEXT: movq %rdx, 16(%rax)
+; CHECK-NOBMI-NEXT: movq %rcx, %xmm0
+; CHECK-NOBMI-NEXT: shrq $61, %rcx
+; CHECK-NOBMI-NEXT: movq %rsi, %xmm1
+; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NOBMI-NEXT: movd %xmm0, %edx
+; CHECK-NOBMI-NEXT: leal (%rcx,%rdx,8), %ecx
+; CHECK-NOBMI-NEXT: andl $15, %ecx
+; CHECK-NOBMI-NEXT: movb %cl, 32(%rax)
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: sign_4xi65:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax
+; CHECK-BMI2-SSE2-NEXT: andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT: negq %rdx
+; CHECK-BMI2-SSE2-NEXT: andl $1, %r8d
+; CHECK-BMI2-SSE2-NEXT: negq %r8
+; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-BMI2-SSE2-NEXT: andl $1, %edi
+; CHECK-BMI2-SSE2-NEXT: negq %rdi
+; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-BMI2-SSE2-NEXT: andl $1, %ecx
+; CHECK-BMI2-SSE2-NEXT: negq %rcx
+; CHECK-BMI2-SSE2-NEXT: movq %rcx, %rsi
+; CHECK-BMI2-SSE2-NEXT: orq $1, %rsi
+; CHECK-BMI2-SSE2-NEXT: movq %rdi, %r9
+; CHECK-BMI2-SSE2-NEXT: orq $1, %r9
+; CHECK-BMI2-SSE2-NEXT: movq %r8, %r10
+; CHECK-BMI2-SSE2-NEXT: orq $1, %r10
+; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm0
+; CHECK-BMI2-SSE2-NEXT: orq $1, %rdx
+; CHECK-BMI2-SSE2-NEXT: movq %rdx, (%rax)
+; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm1
+; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT: movq %xmm0, %rdx
+; CHECK-BMI2-SSE2-NEXT: leaq (%r10,%r10), %r11
+; CHECK-BMI2-SSE2-NEXT: subq %rdx, %r11
+; CHECK-BMI2-SSE2-NEXT: movq %r11, 8(%rax)
+; CHECK-BMI2-SSE2-NEXT: movq %rdi, %xmm0
+; CHECK-BMI2-SSE2-NEXT: movq %r9, %xmm1
+; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT: movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT: andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT: shrq $62, %rdi
+; CHECK-BMI2-SSE2-NEXT: leaq (%rdi,%rdx,4), %rdx
+; CHECK-BMI2-SSE2-NEXT: leaq (%rdx,%rsi,8), %rdx
+; CHECK-BMI2-SSE2-NEXT: movq %rdx, 24(%rax)
+; CHECK-BMI2-SSE2-NEXT: movq %r8, %xmm0
+; CHECK-BMI2-SSE2-NEXT: movq %r10, %xmm1
+; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT: movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT: andl $1, %edx
+; CHECK-BMI2-SSE2-NEXT: shrq $63, %r8
+; CHECK-BMI2-SSE2-NEXT: leaq (%r8,%rdx,2), %rdx
+; CHECK-BMI2-SSE2-NEXT: leaq (%rdx,%r9,4), %rdx
+; CHECK-BMI2-SSE2-NEXT: movq %rdx, 16(%rax)
+; CHECK-BMI2-SSE2-NEXT: movq %rcx, %xmm0
+; CHECK-BMI2-SSE2-NEXT: shrq $61, %rcx
+; CHECK-BMI2-SSE2-NEXT: movq %rsi, %xmm1
+; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-BMI2-SSE2-NEXT: movd %xmm0, %edx
+; CHECK-BMI2-SSE2-NEXT: leal (%rcx,%rdx,8), %ecx
+; CHECK-BMI2-SSE2-NEXT: andl $15, %ecx
+; CHECK-BMI2-SSE2-NEXT: movb %cl, 32(%rax)
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX12-LABEL: sign_4xi65:
+; CHECK-AVX12: # %bb.0:
+; CHECK-AVX12-NEXT: movq %rdi, %rax
+; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-AVX12-NEXT: andl $1, %edi
+; CHECK-AVX12-NEXT: movq %rdi, %rcx
+; CHECK-AVX12-NEXT: negq %rcx
+; CHECK-AVX12-NEXT: andl $1, %r8d
+; CHECK-AVX12-NEXT: movq %r8, %r9
+; CHECK-AVX12-NEXT: negq %r9
+; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; CHECK-AVX12-NEXT: andl $1, %esi
+; CHECK-AVX12-NEXT: movq %rsi, %r10
+; CHECK-AVX12-NEXT: negq %r10
+; CHECK-AVX12-NEXT: andl $1, %edx
+; CHECK-AVX12-NEXT: movq %rdx, %r11
+; CHECK-AVX12-NEXT: negq %r11
+; CHECK-AVX12-NEXT: orq $1, %r11
+; CHECK-AVX12-NEXT: movq %r11, (%rax)
+; CHECK-AVX12-NEXT: addl %r8d, %r8d
+; CHECK-AVX12-NEXT: subq %r8, %rdx
+; CHECK-AVX12-NEXT: orq $2, %rdx
+; CHECK-AVX12-NEXT: movq %rdx, 8(%rax)
+; CHECK-AVX12-NEXT: movq %r10, %rdx
+; CHECK-AVX12-NEXT: shrq $62, %rdx
+; CHECK-AVX12-NEXT: andl $1, %r10d
+; CHECK-AVX12-NEXT: leaq (%rdx,%r10,4), %rdx
+; CHECK-AVX12-NEXT: shll $3, %edi
+; CHECK-AVX12-NEXT: subq %rdi, %rdx
+; CHECK-AVX12-NEXT: orq $8, %rdx
+; CHECK-AVX12-NEXT: movq %rdx, 24(%rax)
+; CHECK-AVX12-NEXT: movq %r9, %rdx
+; CHECK-AVX12-NEXT: shrq $63, %rdx
+; CHECK-AVX12-NEXT: andl $1, %r9d
+; CHECK-AVX12-NEXT: leaq (%rdx,%r9,2), %rdx
+; CHECK-AVX12-NEXT: shll $2, %esi
+; CHECK-AVX12-NEXT: subq %rsi, %rdx
+; CHECK-AVX12-NEXT: orq $4, %rdx
+; CHECK-AVX12-NEXT: movq %rdx, 16(%rax)
+; CHECK-AVX12-NEXT: movq %rcx, %rdx
+; CHECK-AVX12-NEXT: shrq $61, %rdx
+; CHECK-AVX12-NEXT: leal (%rdx,%rcx,8), %ecx
+; CHECK-AVX12-NEXT: andl $15, %ecx
+; CHECK-AVX12-NEXT: movb %cl, 32(%rax)
+; CHECK-AVX12-NEXT: retq
+;
+; CHECK-AVX512-LABEL: sign_4xi65:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: pushq %rbx
+; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
+; CHECK-AVX512-NEXT: .cfi_offset %rbx, -16
+; CHECK-AVX512-NEXT: movq %rdi, %rax
+; CHECK-AVX512-NEXT: andl $1, %edx
+; CHECK-AVX512-NEXT: negq %rdx
+; CHECK-AVX512-NEXT: andl $1, %r8d
+; CHECK-AVX512-NEXT: negq %r8
+; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT: andl $1, %ecx
+; CHECK-AVX512-NEXT: negq %rcx
+; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; CHECK-AVX512-NEXT: andl $1, %esi
+; CHECK-AVX512-NEXT: negq %rsi
+; CHECK-AVX512-NEXT: movl $1, %r9d
+; CHECK-AVX512-NEXT: movq $-1, %r10
+; CHECK-AVX512-NEXT: movq $-1, %rdi
+; CHECK-AVX512-NEXT: cmovnsq %r9, %rdi
+; CHECK-AVX512-NEXT: testq %rcx, %rcx
+; CHECK-AVX512-NEXT: movq $-1, %r11
+; CHECK-AVX512-NEXT: cmovnsq %r9, %r11
+; CHECK-AVX512-NEXT: testq %r8, %r8
+; CHECK-AVX512-NEXT: movq $-1, %rbx
+; CHECK-AVX512-NEXT: cmovnsq %r9, %rbx
+; CHECK-AVX512-NEXT: testq %rdx, %rdx
+; CHECK-AVX512-NEXT: cmovnsq %r9, %r10
+; CHECK-AVX512-NEXT: shrq $63, %rsi
+; CHECK-AVX512-NEXT: shrq $63, %rcx
+; CHECK-AVX512-NEXT: shrq $63, %r8
+; CHECK-AVX512-NEXT: movq %r10, (%rax)
+; CHECK-AVX512-NEXT: shrdq $63, %rbx, %rdx
+; CHECK-AVX512-NEXT: movq %rdx, 8(%rax)
+; CHECK-AVX512-NEXT: shldq $1, %rbx, %r8
+; CHECK-AVX512-NEXT: leaq (%r8,%r11,4), %rdx
+; CHECK-AVX512-NEXT: movq %rdx, 16(%rax)
+; CHECK-AVX512-NEXT: shrq $62, %r11
+; CHECK-AVX512-NEXT: leaq (%r11,%rcx,4), %rcx
+; CHECK-AVX512-NEXT: leaq (%rcx,%rdi,8), %rcx
+; CHECK-AVX512-NEXT: movq %rcx, 24(%rax)
+; CHECK-AVX512-NEXT: shrq $61, %rdi
+; CHECK-AVX512-NEXT: leal (%rdi,%rsi,8), %ecx
+; CHECK-AVX512-NEXT: movb %cl, 32(%rax)
+; CHECK-AVX512-NEXT: popq %rbx
+; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX512-NEXT: retq
+ %c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
+ %res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
+ ret <4 x i65> %res
+}
+
+define i32 @or_neg(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp sgt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ugt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp ugt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_no_smin(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: retq
+ %4 = sub i32 0, %x
+ %5 = icmp sgt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_ult_no_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult_no_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: retq
+ %4 = sub i32 0, %x
+ %5 = icmp ult i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin_but_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovgl %edi, %ecx
+; CHECK-NEXT: negl %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %ecx
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: retq
+ %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %4 = sub i32 0, %3
+ %5 = icmp sgt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_slt_zero_but_no_smin(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_slt_zero_but_no_smin:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $9, %edi
+; CHECK-NEXT: movl $9, %ecx
+; CHECK-NEXT: cmovbl %edi, %ecx
+; CHECK-NEXT: negl %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %ecx
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: retq
+ %3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
+ %4 = sub i32 0, %3
+ %5 = icmp ugt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setge %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp sge i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg3(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setl %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp slt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg4(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setle %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp sle i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult:
+; CHECK: # %bb.0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: retq
+ %3 = or i32 %x, 1
+ %4 = sub i32 0, %3
+ %5 = icmp ugt i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_no_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setge %al
+; CHECK-NEXT: retq
+ %4 = sub i32 0, %x
+ %5 = icmp sge i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; Negative test
+
+define i32 @or_neg_ult_no_zero2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_ult_no_zero2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: retq
+ %4 = sub i32 0, %x
+ %5 = icmp ult i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_no_smin_but_zero2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: cmovgl %edi, %ecx
+; CHECK-NEXT: negl %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %ecx
+; CHECK-NEXT: setle %al
+; CHECK-NEXT: retq
+ %3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %4 = sub i32 0, %3
+ %5 = icmp sle i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @or_neg_slt_zero_but_no_smin2(i32 %x, i32 %y) {
+; CHECK-LABEL: or_neg_slt_zero_but_no_smin2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $9, %edi
+; CHECK-NEXT: movl $9, %ecx
+; CHECK-NEXT: cmovbl %edi, %ecx
+; CHECK-NEXT: negl %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl %esi, %ecx
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: retq
+ %3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
+ %4 = sub i32 0, %3
+ %5 = icmp uge i32 %4, %y
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare void @use_4xi1(<4 x i1>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-BMI2: {{.*}}
+; CHECK-NOBMI-SSE2: {{.*}}
>From adfea58ac082b34e6abee15e29a78ec0e1a7b25f Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Sun, 3 Aug 2025 13:01:08 -0400
Subject: [PATCH 2/2] [SelectionDAG] Move VSelect sign pattern check from
AArch64 general SelectionDAG
For some reason the check is already there, but it bails out. Probably to allow aarch64 to transform it, but why not have the logic in selectiondag then?
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 ++-
.../Target/AArch64/AArch64ISelLowering.cpp | 31 ---------
llvm/test/CodeGen/X86/cmp-select-sign.ll | 69 +++++++++----------
3 files changed, 40 insertions(+), 69 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 11e869aebe7da..37cc7b02ed9b0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13142,8 +13142,13 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
ISD::isConstantSplatVector(TVal.getNode(), TValAPInt) &&
TValAPInt.isOne() &&
ISD::isConstantSplatVectorAllOnes(Cond.getOperand(1).getNode()) &&
- ISD::isConstantSplatVectorAllOnes(FVal.getNode())) {
- return SDValue();
+ ISD::isConstantSplatVectorAllOnes(FVal.getNode()) &&
+ !TLI.shouldAvoidTransformToShift(VT, VT.getScalarSizeInBits() - 1)) {
+ SDValue LHS = Cond.getOperand(0);
+ EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue ShiftC = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, ShiftVT);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, ShiftC);
+ return DAG.getNode(ISD::OR, DL, VT, Shift, TVal);
}
// To use the condition operand as a bitwise mask, it must have elements that
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2b6ea86ee1af5..cb0964dcbf546 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25887,38 +25887,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
}
}
- // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
- // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
- // supported types.
SDValue SetCC = N->getOperand(0);
- if (SetCC.getOpcode() == ISD::SETCC &&
- SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
- SDValue CmpLHS = SetCC.getOperand(0);
- EVT VT = CmpLHS.getValueType();
- SDNode *CmpRHS = SetCC.getOperand(1).getNode();
- SDNode *SplatLHS = N->getOperand(1).getNode();
- SDNode *SplatRHS = N->getOperand(2).getNode();
- APInt SplatLHSVal;
- if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
- VT.isSimple() &&
- is_contained(ArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v2i64}),
- VT.getSimpleVT().SimpleTy) &&
- ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
- SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
- ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Ops(
- NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
- VT.getScalarType()));
- SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
-
- auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
- auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
- return Or;
- }
- }
-
EVT CmpVT = N0.getOperand(0).getValueType();
if (N0.getOpcode() != ISD::SETCC ||
CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
diff --git a/llvm/test/CodeGen/X86/cmp-select-sign.ll b/llvm/test/CodeGen/X86/cmp-select-sign.ll
index 18adc66f9a2ff..889d5c7d780bc 100644
--- a/llvm/test/CodeGen/X86/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/X86/cmp-select-sign.ll
@@ -331,10 +331,8 @@ define <3 x i32> @sign_3xi32(<3 x i32> %a) {
;
; CHECK-AVX512-LABEL: sign_3xi32:
; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
-; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 {%k1} = [1,1,1,1]
-; CHECK-AVX512-NEXT: vmovdqa %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
%res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1>
@@ -747,41 +745,40 @@ define <4 x i65> @sign_4xi65(<4 x i65> %a) {
; CHECK-AVX512-NEXT: movq %rdi, %rax
; CHECK-AVX512-NEXT: andl $1, %edx
; CHECK-AVX512-NEXT: negq %rdx
-; CHECK-AVX512-NEXT: andl $1, %r8d
-; CHECK-AVX512-NEXT: negq %r8
; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX512-NEXT: andl $1, %ecx
-; CHECK-AVX512-NEXT: negq %rcx
-; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; CHECK-AVX512-NEXT: andl $1, %esi
+; CHECK-AVX512-NEXT: movq %rcx, %r9
+; CHECK-AVX512-NEXT: negq %r9
+; CHECK-AVX512-NEXT: andl $1, %r8d
+; CHECK-AVX512-NEXT: negq %r8
+; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
+; CHECK-AVX512-NEXT: andl $1, %r11d
+; CHECK-AVX512-NEXT: movq %r11, %rsi
; CHECK-AVX512-NEXT: negq %rsi
-; CHECK-AVX512-NEXT: movl $1, %r9d
-; CHECK-AVX512-NEXT: movq $-1, %r10
-; CHECK-AVX512-NEXT: movq $-1, %rdi
-; CHECK-AVX512-NEXT: cmovnsq %r9, %rdi
-; CHECK-AVX512-NEXT: testq %rcx, %rcx
-; CHECK-AVX512-NEXT: movq $-1, %r11
-; CHECK-AVX512-NEXT: cmovnsq %r9, %r11
-; CHECK-AVX512-NEXT: testq %r8, %r8
-; CHECK-AVX512-NEXT: movq $-1, %rbx
-; CHECK-AVX512-NEXT: cmovnsq %r9, %rbx
-; CHECK-AVX512-NEXT: testq %rdx, %rdx
-; CHECK-AVX512-NEXT: cmovnsq %r9, %r10
-; CHECK-AVX512-NEXT: shrq $63, %rsi
-; CHECK-AVX512-NEXT: shrq $63, %rcx
-; CHECK-AVX512-NEXT: shrq $63, %r8
-; CHECK-AVX512-NEXT: movq %r10, (%rax)
-; CHECK-AVX512-NEXT: shrdq $63, %rbx, %rdx
-; CHECK-AVX512-NEXT: movq %rdx, 8(%rax)
-; CHECK-AVX512-NEXT: shldq $1, %rbx, %r8
-; CHECK-AVX512-NEXT: leaq (%r8,%r11,4), %rdx
-; CHECK-AVX512-NEXT: movq %rdx, 16(%rax)
-; CHECK-AVX512-NEXT: shrq $62, %r11
-; CHECK-AVX512-NEXT: leaq (%r11,%rcx,4), %rcx
-; CHECK-AVX512-NEXT: leaq (%rcx,%rdi,8), %rcx
-; CHECK-AVX512-NEXT: movq %rcx, 24(%rax)
-; CHECK-AVX512-NEXT: shrq $61, %rdi
-; CHECK-AVX512-NEXT: leal (%rdi,%rsi,8), %ecx
+; CHECK-AVX512-NEXT: movq %rsi, %rdi
+; CHECK-AVX512-NEXT: shrq $63, %rdi
+; CHECK-AVX512-NEXT: movq %r8, %r10
+; CHECK-AVX512-NEXT: shrq $63, %r10
+; CHECK-AVX512-NEXT: movq %r9, %rbx
+; CHECK-AVX512-NEXT: shrq $63, %rbx
+; CHECK-AVX512-NEXT: shldq $1, %r8, %r10
+; CHECK-AVX512-NEXT: orq $1, %r8
+; CHECK-AVX512-NEXT: shldq $1, %rdx, %r8
+; CHECK-AVX512-NEXT: orq $1, %rdx
+; CHECK-AVX512-NEXT: movq %rdx, (%rax)
+; CHECK-AVX512-NEXT: movq %r8, 8(%rax)
+; CHECK-AVX512-NEXT: shrq $62, %r9
+; CHECK-AVX512-NEXT: leaq (%r9,%rbx,4), %rdx
+; CHECK-AVX512-NEXT: shll $3, %r11d
+; CHECK-AVX512-NEXT: subq %r11, %rdx
+; CHECK-AVX512-NEXT: orq $8, %rdx
+; CHECK-AVX512-NEXT: movq %rdx, 24(%rax)
+; CHECK-AVX512-NEXT: shll $2, %ecx
+; CHECK-AVX512-NEXT: subq %rcx, %r10
+; CHECK-AVX512-NEXT: orq $4, %r10
+; CHECK-AVX512-NEXT: movq %r10, 16(%rax)
+; CHECK-AVX512-NEXT: shrq $61, %rsi
+; CHECK-AVX512-NEXT: leal (%rsi,%rdi,8), %ecx
; CHECK-AVX512-NEXT: movb %cl, 32(%rax)
; CHECK-AVX512-NEXT: popq %rbx
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
More information about the llvm-commits
mailing list