[llvm] [WIP][SelectionDAG] Add support for the 3-way comparison intrinsics [US]CMP (PR #91871)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 06:13:20 PDT 2024
================
@@ -0,0 +1,498 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define i8 @ucmp(i32 %x, i32 %y) {
+; CHECK-LABEL: ucmp:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: cmovael %ecx, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i8 @scmp(i32 %x, i32 %y) {
+; CHECK-LABEL: scmp:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: cmovael %ecx, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %1
+}
+
+define i4 @ucmp_narrow_result(i32 %x, i32 %y) {
+; CHECK-LABEL: ucmp_narrow_result:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: cmovael %ecx, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %1 = call i4 @llvm.ucmp(i32 %x, i32 %y)
+ ret i4 %1
+}
+
+define i8 @scmp_narrow_op(i5 %x, i5 %y) {
+; CHECK-LABEL: scmp_narrow_op:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $31, %sil
+; CHECK-NEXT: andb $31, %dil
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpb %sil, %dil
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: cmovael %ecx, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %1 = call i8 @llvm.ucmp(i5 %x, i5 %y)
+ ret i8 %1
+}
+
+define i128 @ucmp_wide_result(i32 %x, i32 %y) {
+; CHECK-LABEL: ucmp_wide_result:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: cmovaeq %rcx, %rax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: retq
+ %1 = call i128 @llvm.ucmp(i32 %x, i32 %y)
+ ret i128 %1
+}
+
+define i8 @scmp_wide_op(i128 %x, i128 %y) {
+; CHECK-LABEL: scmp_wide_op:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpq %rdi, %rdx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: sbbq %rsi, %rax
+; CHECK-NEXT: setl %al
+; CHECK-NEXT: movzbl %al, %r8d
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: sbbq %rcx, %rsi
+; CHECK-NEXT: movl $255, %eax
+; CHECK-NEXT: cmovgel %r8d, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %1 = call i8 @llvm.scmp(i128 %x, i128 %y)
+ ret i8 %1
+}
+
+define i41 @ucmp_uncommon_types(i7 %x, i7 %y) {
+; CHECK-LABEL: ucmp_uncommon_types:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $127, %sil
+; CHECK-NEXT: andb $127, %dil
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpb %sil, %dil
+; CHECK-NEXT: seta %cl
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: cmovaeq %rcx, %rax
+; CHECK-NEXT: retq
+ %1 = call i41 @llvm.ucmp(i7 %x, i7 %y)
+ ret i41 %1
+}
+
+define i125 @scmp_uncommon_types(i99 %x, i99 %y) {
+; CHECK-LABEL: scmp_uncommon_types:
+; CHECK: # %bb.0:
+; CHECK-NEXT: shlq $29, %rsi
+; CHECK-NEXT: sarq $29, %rsi
+; CHECK-NEXT: shlq $29, %rcx
+; CHECK-NEXT: sarq $29, %rcx
+; CHECK-NEXT: cmpq %rdi, %rdx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: sbbq %rsi, %rax
+; CHECK-NEXT: setl %al
+; CHECK-NEXT: movzbl %al, %r8d
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: sbbq %rcx, %rsi
+; CHECK-NEXT: movq $-1, %rax
+; CHECK-NEXT: cmovgeq %r8, %rax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: retq
+ %1 = call i125 @llvm.scmp(i99 %x, i99 %y)
+ ret i125 %1
+}
+
+define <4 x i32> @ucmp_normal_vectors(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: ucmp_normal_vectors:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: pxor %xmm2, %xmm0
+; CHECK-NEXT: pxor %xmm2, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <4 x i32> @llvm.ucmp(<4 x i32> %x, <4 x i32> %y)
+ ret <4 x i32> %1
+}
+
+define <4 x i8> @scmp_narrow_result(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: scmp_narrow_result:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: packuswb %xmm1, %xmm0
+; CHECK-NEXT: packuswb %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %y)
+ ret <4 x i8> %1
+}
+
+define <4 x i32> @ucmp_narrow_op(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: ucmp_narrow_op:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <4 x i32> @llvm.ucmp(<4 x i8> %x, <4 x i8> %y)
+ ret <4 x i32> %1
+}
+
+define <16 x i32> @scmp_wide_res(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: scmp_wide_res:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa %xmm1, %xmm4
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-NEXT: psrad $24, %xmm0
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; CHECK-NEXT: psrad $24, %xmm5
+; CHECK-NEXT: movdqa %xmm5, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: por %xmm6, %xmm0
+; CHECK-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; CHECK-NEXT: psrad $24, %xmm1
+; CHECK-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; CHECK-NEXT: psrad $24, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm1
+; CHECK-NEXT: psrld $31, %xmm1
+; CHECK-NEXT: por %xmm5, %xmm1
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; CHECK-NEXT: psrad $24, %xmm2
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; CHECK-NEXT: psrad $24, %xmm5
+; CHECK-NEXT: movdqa %xmm5, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm2
+; CHECK-NEXT: psrld $31, %xmm2
+; CHECK-NEXT: por %xmm6, %xmm2
+; CHECK-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
+; CHECK-NEXT: psrad $24, %xmm3
+; CHECK-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4,4,5,5,6,6,7,7]
+; CHECK-NEXT: psrad $24, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm3
+; CHECK-NEXT: psrld $31, %xmm3
+; CHECK-NEXT: por %xmm5, %xmm3
+; CHECK-NEXT: retq
+ %1 = call <16 x i32> @llvm.scmp(<16 x i8> %x, <16 x i8> %y)
+ ret <16 x i32> %1
+}
+
+define <16 x i8> @ucmp_wide_op(<16 x i32> %x, <16 x i32> %y) {
+; CHECK-LABEL: ucmp_wide_op:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: pxor %xmm8, %xmm3
+; CHECK-NEXT: pxor %xmm8, %xmm7
+; CHECK-NEXT: movdqa %xmm7, %xmm9
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm9
+; CHECK-NEXT: pcmpgtd %xmm7, %xmm3
+; CHECK-NEXT: psrld $31, %xmm3
+; CHECK-NEXT: por %xmm9, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm7 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pxor %xmm8, %xmm2
+; CHECK-NEXT: pxor %xmm8, %xmm6
+; CHECK-NEXT: movdqa %xmm6, %xmm9
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm9
+; CHECK-NEXT: pcmpgtd %xmm6, %xmm2
+; CHECK-NEXT: psrld $31, %xmm2
+; CHECK-NEXT: por %xmm9, %xmm2
+; CHECK-NEXT: pand %xmm7, %xmm2
+; CHECK-NEXT: packuswb %xmm3, %xmm2
+; CHECK-NEXT: pxor %xmm8, %xmm1
+; CHECK-NEXT: pxor %xmm8, %xmm5
+; CHECK-NEXT: movdqa %xmm5, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm1
+; CHECK-NEXT: psrld $31, %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: pand %xmm7, %xmm1
+; CHECK-NEXT: pxor %xmm8, %xmm0
+; CHECK-NEXT: pxor %xmm8, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: por %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm7, %xmm0
+; CHECK-NEXT: packuswb %xmm1, %xmm0
+; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %1 = call <16 x i8> @llvm.ucmp(<16 x i32> %x, <16 x i32> %y)
+ ret <16 x i8> %1
+}
+
+define <7 x i25> @scmp_uncommon_vectors(<7 x i109> %x, <7 x i109> %y) {
----------------
nikic wrote:
```suggestion
define <7 x i25> @scmp_uncommon_vectors(<7 x i109> %x, <7 x i109> %y) nounwind {
```
To suppress CFI.
https://github.com/llvm/llvm-project/pull/91871
More information about the llvm-commits
mailing list