[llvm] [X86] Add test coverage showing BT/BTC/BTR/BTS and 'init' patterns for big (illegal) integer types (PR #165361)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 28 02:32:00 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Beginning of an investigation into how we can better handle bit twiddling of _BitInt types
---
Patch is 302.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165361.diff
1 Files Affected:
- (added) llvm/test/CodeGen/X86/bittest-big-integer.ll (+7027)
``````````diff
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
new file mode 100644
index 0000000000000..19d751d176b6a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -0,0 +1,7027 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers
+
+;
+; i32 bt/btc/btr/bts + init (reference)
+;
+
+define i1 @test_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: btl %ecx, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_eq_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: retq
+ %rem = and i32 %position, 31
+ %bit = shl nuw i32 1, %rem
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %cmp = icmp eq i32 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btcl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setb %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: complement_ne_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btcl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setb %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = xor i32 %ld, %bit
+ %cmp = icmp ne i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btrl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setae %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: reset_eq_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btrl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %mask = xor i32 %bit, -1
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = and i32 %ld, %mask
+ %cmp = icmp eq i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: btsl %eax, %esi
+; X86-NEXT: btl %eax, %edx
+; X86-NEXT: setb %al
+; X86-NEXT: movl %esi, (%ecx)
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: set_ne_i32:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: btsl %esi, %ecx
+; X64-NEXT: btl %esi, %eax
+; X64-NEXT: setb %al
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res = or i32 %ld, %bit
+ %cmp = icmp ne i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i32:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movl (%edx), %esi
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: btrl %ecx, %edi
+; X86-NEXT: orl %eax, %edi
+; X86-NEXT: btl %ecx, %esi
+; X86-NEXT: setae %al
+; X86-NEXT: movl %edi, (%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i32:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: shll %cl, %edx
+; SSE-NEXT: movl (%rdi), %eax
+; SSE-NEXT: movl %eax, %esi
+; SSE-NEXT: btrl %ecx, %esi
+; SSE-NEXT: orl %edx, %esi
+; SSE-NEXT: btl %ecx, %eax
+; SSE-NEXT: setae %al
+; SSE-NEXT: movl %esi, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: init_eq_i32:
+; AVX: # %bb.0:
+; AVX-NEXT: shlxl %esi, %edx, %eax
+; AVX-NEXT: movl (%rdi), %ecx
+; AVX-NEXT: movl %ecx, %edx
+; AVX-NEXT: btrl %esi, %edx
+; AVX-NEXT: orl %eax, %edx
+; AVX-NEXT: btl %esi, %ecx
+; AVX-NEXT: setae %al
+; AVX-NEXT: movl %edx, (%rdi)
+; AVX-NEXT: retq
+ %ofs = and i32 %position, 31
+ %bit = shl nuw i32 1, %ofs
+ %mask = xor i32 %bit, -1
+ %val0 = zext i1 %value to i32
+ %val = shl nuw i32 %val0, %ofs
+ %ld = load i32, ptr %word
+ %test = and i32 %ld, %bit
+ %res0 = and i32 %ld, %mask
+ %res = or i32 %res0, %val
+ %cmp = icmp eq i32 %test, 0
+ store i32 %res, ptr %word
+ ret i1 %cmp
+}
+
+;
+; i64 bt/btc/btr/bts + init
+;
+
+define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB5_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: .LBB5_2:
+; X86-NEXT: andl 4(%eax), %esi
+; X86-NEXT: andl (%eax), %edx
+; X86-NEXT: orl %esi, %edx
+; X86-NEXT: setne %al
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: test_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB6_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB6_2:
+; X86-NEXT: movl (%edx), %ecx
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: andl %esi, %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: andl %eax, %ebp
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: setne %al
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: complement_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btcq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = xor i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB7_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: .LBB7_2:
+; X86-NEXT: movl (%edx), %eax
+; X86-NEXT: movl 4(%edx), %ecx
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl %edi, %ebx
+; X86-NEXT: notl %edi
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: andl %esi, %ebp
+; X86-NEXT: notl %esi
+; X86-NEXT: andl %ecx, %edi
+; X86-NEXT: andl %eax, %esi
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: sete %al
+; X86-NEXT: movl %esi, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: reset_eq_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btrq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setae %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %mask = xor i64 %bit, -1
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = and i64 %ld, %mask
+ %cmp = icmp eq i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: shldl %cl, %eax, %esi
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB8_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB8_2:
+; X86-NEXT: movl (%edx), %ecx
+; X86-NEXT: movl 4(%edx), %edi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: andl %esi, %ebx
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: andl %eax, %ebp
+; X86-NEXT: orl %esi, %edi
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: orl %ebx, %ebp
+; X86-NEXT: setne %al
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: set_ne_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: btsq %rsi, %rcx
+; X64-NEXT: btq %rsi, %rax
+; X64-NEXT: setb %al
+; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res = or i64 %ld, %bit
+ %cmp = icmp ne i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: shldl %cl, %eax, %edx
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: testb $32, %cl
+; X86-NEXT: je .LBB9_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl $0, %eax
+; X86-NEXT: .LBB9_2:
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: notl %ebx
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: notl %ebp
+; X86-NEXT: je .LBB9_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: .LBB9_4:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl 4(%ecx), %ecx
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: andl %ecx, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl (%edi), %ecx
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: andl %ecx, %ebp
+; X86-NEXT: orl %esi, %ebp
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: movl %ebp, (%edi)
+; X86-NEXT: movl %ebx, 4(%edi)
+; X86-NEXT: sete %al
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: init_eq_i64:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl %edx, %eax
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: movq (%rdi), %rdx
+; SSE-NEXT: movq %rdx, %rsi
+; SSE-NEXT: btrq %rcx, %rsi
+; SSE-NEXT: orq %rax, %rsi
+; SSE-NEXT: btq %rcx, %rdx
+; SSE-NEXT: setae %al
+; SSE-NEXT: movq %rsi, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: init_eq_i64:
+; AVX: # %bb.0:
+; AVX-NEXT: # kill: def $esi killed $esi def $rsi
+; AVX-NEXT: movl %edx, %eax
+; AVX-NEXT: shlxq %rsi, %rax, %rax
+; AVX-NEXT: movq (%rdi), %rcx
+; AVX-NEXT: movq %rcx, %rdx
+; AVX-NEXT: btrq %rsi, %rdx
+; AVX-NEXT: orq %rax, %rdx
+; AVX-NEXT: btq %rsi, %rcx
+; AVX-NEXT: setae %al
+; AVX-NEXT: movq %rdx, (%rdi)
+; AVX-NEXT: retq
+ %rem = and i32 %position, 63
+ %ofs = zext nneg i32 %rem to i64
+ %bit = shl nuw i64 1, %ofs
+ %mask = xor i64 %bit, -1
+ %val0 = zext i1 %value to i64
+ %val = shl nuw i64 %val0, %ofs
+ %ld = load i64, ptr %word
+ %test = and i64 %ld, %bit
+ %res0 = and i64 %ld, %mask
+ %res = or i64 %res0, %val
+ %cmp = icmp eq i64 %test, 0
+ store i64 %res, ptr %word
+ ret i1 %cmp
+}
+
+;
+; i128
+;
+
+define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $48, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, (%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %esi
+; X86-NEXT: movl 24(%esp,%esi), %edi
+; X86-NEXT: movl 28(%esp,%esi), %eax
+; X86-NEXT: shldl %cl, %edi, %eax
+; X86-NEXT: movl 16(%esp,%esi), %edx
+; X86-NEXT: movl 20(%esp,%esi), %esi
+; X86-NEXT: shldl %cl, %esi, %edi
+; X86-NEXT: shldl %cl, %edx, %esi
+; X86-NEXT: movl 8(%ebp), %ebx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: andl 8(%ebx), %edi
+; X86-NEXT: andl (%ebx), %edx
+; X86-NEXT: orl %edi, %edx
+; X86-NEXT: andl 12(%ebx), %eax
+; X86-NEXT: andl 4(%ebx), %esi
+; X86-NEXT: orl %eax, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: setne %al
+; X86-NEXT: leal -12(%ebp), %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; SSE-LABEL: test_ne_i128:
+; SSE: # %bb.0:
+; SSE-NEXT: movl %esi, %ecx
+; SSE-NEXT: movl $1, %eax
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: shldq %cl, %rax, %rdx
+; SSE-NEXT: xorl %esi, %esi
+; SSE-NEXT: shlq %cl, %rax
+; SSE-NEXT: testb $64, %cl
+; SSE-NEXT: cmovneq %rax, %rdx
+; SSE-NEXT: cmovneq %rsi, %rax
+; SSE-NEXT: andq 8(%rdi), %rdx
+; SSE-NEXT: andq (%rdi), %rax
+; SSE-NEXT: orq %rdx, %rax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: test_ne_i128:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %esi, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: movl $1, %edx
+; AVX2-NEXT: xorl %esi, %esi
+; AVX2-NEXT: shldq %cl, %rdx, %rsi
+; AVX2-NEXT: shlxq %rcx, %rdx, %rdx
+; AVX2-NEXT: testb $64, %cl
+; AVX2-NEXT: cmovneq %rdx, %rsi
+; AVX2-NEXT: cmovneq %rax, %rdx
+; AVX2-NEXT: andq 8(%rdi), %rsi
+; AVX2-NEXT: andq (%rdi), %rdx
+; AVX2-NEXT: orq %rsi, %rdx
+; AVX2-NEXT: setne %al
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_ne_i128:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl %esi, %ecx
+; AVX512-NEXT: movl $1, %eax
+; AVX512-NEXT: xorl %edx, %edx
+; AVX512-NEXT: shldq %cl, %rax, %rdx
+; AVX512-NEXT: xorl %esi, %esi
+; AVX512-NEXT: shlxq %rcx, %rax, %rax
+; AVX512-NEXT: testb $64, %cl
+; AVX512-NEXT: cmovneq %rax, %rdx
+; AVX512-NEXT: cmovneq %rsi, %rax
+; AVX512-NEXT: andq 8(%rdi), %rdx
+; AVX512-NEXT: andq (%rdi), %rax
+; AVX512-NEXT: orq %rdx, %rax
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: retq
+ %rem = and i32 %position, 127
+ %ofs = zext nneg i32 %rem to i128
+ %bit = shl nuw i128 1, %ofs
+ %ld = load i128, ptr %word
+ %test = and i128 %ld, %bit
+ %cmp = icmp ne i128 %test, 0
+ ret i1 %cmp
+}
+
+define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i128:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: movl %esp, %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: andl $-16, %esp
+; X86-NEXT: subl $80, %esp
+; X86-NEXT: movzbl 12(%ebp), %ecx
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $3, %al
+; X86-NEXT: andb $12, %al
+; X86-NEXT: negb %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl 56(%esp,%eax), %esi
+; X86-NEXT: movl 60(%esp,%eax), %edx
+; X86-NEXT: shldl %cl, %esi, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 48(%esp,%eax), %edi
+; X86-NEXT: movl 52(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %ebx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: shll %cl, %edi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 8(%eax), %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: movl (%ecx), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl 12(%esi), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165361
More information about the llvm-commits
mailing list