[llvm] [X86] Add test coverage showing BT/BTC/BTR/BTS and 'init' patterns for big (illegal) integer types (PR #165361)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 28 02:32:00 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Beginning of an investigation into how we can better handle bit twiddling of _BitInt types

---

Patch is 302.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165361.diff


1 Files Affected:

- (added) llvm/test/CodeGen/X86/bittest-big-integer.ll (+7027) 


``````````diff
diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll
new file mode 100644
index 0000000000000..19d751d176b6a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll
@@ -0,0 +1,7027 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686--                   | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; bt/btc/btr/bts patterns + 'init' to set single bit value in large integers
+
+;
+; i32 bt/btc/btr/bts + init (reference)
+;
+
+define i1 @test_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_eq_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_eq_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    btl %esi, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %rem = and i32 %position, 31
+  %bit = shl nuw i32 1, %rem
+  %ld = load i32, ptr %word
+  %test = and i32 %ld, %bit
+  %cmp = icmp eq i32 %test, 0
+  ret i1 %cmp
+}
+
+define i1 @complement_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    btcl %eax, %esi
+; X86-NEXT:    btl %eax, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %esi, (%ecx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: complement_ne_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    btcl %esi, %ecx
+; X64-NEXT:    btl %esi, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    movl %ecx, (%rdi)
+; X64-NEXT:    retq
+  %ofs = and i32 %position, 31
+  %bit = shl nuw i32 1, %ofs
+  %ld = load i32, ptr %word
+  %test = and i32 %ld, %bit
+  %res = xor i32 %ld, %bit
+  %cmp = icmp ne i32 %test, 0
+  store i32 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @reset_eq_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    btrl %eax, %esi
+; X86-NEXT:    btl %eax, %edx
+; X86-NEXT:    setae %al
+; X86-NEXT:    movl %esi, (%ecx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: reset_eq_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    btrl %esi, %ecx
+; X64-NEXT:    btl %esi, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movl %ecx, (%rdi)
+; X64-NEXT:    retq
+  %ofs = and i32 %position, 31
+  %bit = shl nuw i32 1, %ofs
+  %mask = xor i32 %bit, -1
+  %ld = load i32, ptr %word
+  %test = and i32 %ld, %bit
+  %res = and i32 %ld, %mask
+  %cmp = icmp eq i32 %test, 0
+  store i32 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @set_ne_i32(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    btsl %eax, %esi
+; X86-NEXT:    btl %eax, %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %esi, (%ecx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: set_ne_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    btsl %esi, %ecx
+; X64-NEXT:    btl %esi, %eax
+; X64-NEXT:    setb %al
+; X64-NEXT:    movl %ecx, (%rdi)
+; X64-NEXT:    retq
+  %ofs = and i32 %position, 31
+  %bit = shl nuw i32 1, %ofs
+  %ld = load i32, ptr %word
+  %test = and i32 %ld, %bit
+  %res = or i32 %ld, %bit
+  %cmp = icmp ne i32 %test, 0
+  store i32 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @init_eq_i32(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl (%edx), %esi
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    btrl %ecx, %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    btl %ecx, %esi
+; X86-NEXT:    setae %al
+; X86-NEXT:    movl %edi, (%edx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; SSE-LABEL: init_eq_i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    shll %cl, %edx
+; SSE-NEXT:    movl (%rdi), %eax
+; SSE-NEXT:    movl %eax, %esi
+; SSE-NEXT:    btrl %ecx, %esi
+; SSE-NEXT:    orl %edx, %esi
+; SSE-NEXT:    btl %ecx, %eax
+; SSE-NEXT:    setae %al
+; SSE-NEXT:    movl %esi, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: init_eq_i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    shlxl %esi, %edx, %eax
+; AVX-NEXT:    movl (%rdi), %ecx
+; AVX-NEXT:    movl %ecx, %edx
+; AVX-NEXT:    btrl %esi, %edx
+; AVX-NEXT:    orl %eax, %edx
+; AVX-NEXT:    btl %esi, %ecx
+; AVX-NEXT:    setae %al
+; AVX-NEXT:    movl %edx, (%rdi)
+; AVX-NEXT:    retq
+  %ofs = and i32 %position, 31
+  %bit = shl nuw i32 1, %ofs
+  %mask = xor i32 %bit, -1
+  %val0 = zext i1 %value to i32
+  %val = shl nuw i32 %val0, %ofs
+  %ld = load i32, ptr %word
+  %test = and i32 %ld, %bit
+  %res0 = and i32 %ld, %mask
+  %res = or i32 %res0, %val
+  %cmp = icmp eq i32 %test, 0
+  store i32 %res, ptr %word
+  ret i1 %cmp
+}
+
+;
+; i64 bt/btc/btr/bts + init
+;
+
+define i1 @test_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB5_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:  .LBB5_2:
+; X86-NEXT:    andl 4(%eax), %esi
+; X86-NEXT:    andl (%eax), %edx
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    setne %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_ne_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    btq %rsi, %rax
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
+  %rem = and i32 %position, 63
+  %ofs = zext nneg i32 %rem to i64
+  %bit = shl nuw i64 1, %ofs
+  %ld = load i64, ptr %word
+  %test = and i64 %ld, %bit
+  %cmp = icmp ne i64 %test, 0
+  ret i1 %cmp
+}
+
+define i1 @complement_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB6_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB6_2:
+; X86-NEXT:    movl (%edx), %ecx
+; X86-NEXT:    movl 4(%edx), %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    andl %eax, %ebp
+; X86-NEXT:    xorl %esi, %edi
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    setne %al
+; X86-NEXT:    movl %ecx, (%edx)
+; X86-NEXT:    movl %edi, 4(%edx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: complement_ne_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    btcq %rsi, %rcx
+; X64-NEXT:    btq %rsi, %rax
+; X64-NEXT:    setb %al
+; X64-NEXT:    movq %rcx, (%rdi)
+; X64-NEXT:    retq
+  %rem = and i32 %position, 63
+  %ofs = zext nneg i32 %rem to i64
+  %bit = shl nuw i64 1, %ofs
+  %ld = load i64, ptr %word
+  %test = and i64 %ld, %bit
+  %res = xor i64 %ld, %bit
+  %cmp = icmp ne i64 %test, 0
+  store i64 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @reset_eq_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: reset_eq_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB7_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB7_2:
+; X86-NEXT:    movl (%edx), %eax
+; X86-NEXT:    movl 4(%edx), %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    andl %edi, %ebx
+; X86-NEXT:    notl %edi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    andl %esi, %ebp
+; X86-NEXT:    notl %esi
+; X86-NEXT:    andl %ecx, %edi
+; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    sete %al
+; X86-NEXT:    movl %esi, (%edx)
+; X86-NEXT:    movl %edi, 4(%edx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: reset_eq_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    btrq %rsi, %rcx
+; X64-NEXT:    btq %rsi, %rax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movq %rcx, (%rdi)
+; X64-NEXT:    retq
+  %rem = and i32 %position, 63
+  %ofs = zext nneg i32 %rem to i64
+  %bit = shl nuw i64 1, %ofs
+  %mask = xor i64 %bit, -1
+  %ld = load i64, ptr %word
+  %test = and i64 %ld, %bit
+  %res = and i64 %ld, %mask
+  %cmp = icmp eq i64 %test, 0
+  store i64 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @set_ne_i64(ptr %word, i32 %position) nounwind {
+; X86-LABEL: set_ne_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB8_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB8_2:
+; X86-NEXT:    movl (%edx), %ecx
+; X86-NEXT:    movl 4(%edx), %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    andl %eax, %ebp
+; X86-NEXT:    orl %esi, %edi
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    orl %ebx, %ebp
+; X86-NEXT:    setne %al
+; X86-NEXT:    movl %ecx, (%edx)
+; X86-NEXT:    movl %edi, 4(%edx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: set_ne_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    btsq %rsi, %rcx
+; X64-NEXT:    btq %rsi, %rax
+; X64-NEXT:    setb %al
+; X64-NEXT:    movq %rcx, (%rdi)
+; X64-NEXT:    retq
+  %rem = and i32 %position, 63
+  %ofs = zext nneg i32 %rem to i64
+  %bit = shl nuw i64 1, %ofs
+  %ld = load i64, ptr %word
+  %test = and i64 %ld, %bit
+  %res = or i64 %ld, %bit
+  %cmp = icmp ne i64 %test, 0
+  store i64 %res, ptr %word
+  ret i1 %cmp
+}
+
+define i1 @init_eq_i64(ptr %word, i32 %position, i1 zeroext %value) nounwind {
+; X86-LABEL: init_eq_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $1, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    je .LBB9_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:  .LBB9_2:
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    notl %ebx
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    notl %ebp
+; X86-NEXT:    je .LBB9_4
+; X86-NEXT:  # %bb.3:
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:  .LBB9_4:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl 4(%ecx), %ecx
+; X86-NEXT:    andl %ecx, %edx
+; X86-NEXT:    andl %ecx, %ebx
+; X86-NEXT:    orl %edi, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl (%edi), %ecx
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    andl %ecx, %ebp
+; X86-NEXT:    orl %esi, %ebp
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    movl %ebp, (%edi)
+; X86-NEXT:    movl %ebx, 4(%edi)
+; X86-NEXT:    sete %al
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: init_eq_i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    movl %edx, %eax
+; SSE-NEXT:    shlq %cl, %rax
+; SSE-NEXT:    movq (%rdi), %rdx
+; SSE-NEXT:    movq %rdx, %rsi
+; SSE-NEXT:    btrq %rcx, %rsi
+; SSE-NEXT:    orq %rax, %rsi
+; SSE-NEXT:    btq %rcx, %rdx
+; SSE-NEXT:    setae %al
+; SSE-NEXT:    movq %rsi, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: init_eq_i64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX-NEXT:    movl %edx, %eax
+; AVX-NEXT:    shlxq %rsi, %rax, %rax
+; AVX-NEXT:    movq (%rdi), %rcx
+; AVX-NEXT:    movq %rcx, %rdx
+; AVX-NEXT:    btrq %rsi, %rdx
+; AVX-NEXT:    orq %rax, %rdx
+; AVX-NEXT:    btq %rsi, %rcx
+; AVX-NEXT:    setae %al
+; AVX-NEXT:    movq %rdx, (%rdi)
+; AVX-NEXT:    retq
+  %rem = and i32 %position, 63
+  %ofs = zext nneg i32 %rem to i64
+  %bit = shl nuw i64 1, %ofs
+  %mask = xor i64 %bit, -1
+  %val0 = zext i1 %value to i64
+  %val = shl nuw i64 %val0, %ofs
+  %ld = load i64, ptr %word
+  %test = and i64 %ld, %bit
+  %res0 = and i64 %ld, %mask
+  %res = or i64 %res0, %val
+  %cmp = icmp eq i64 %test, 0
+  store i64 %res, ptr %word
+  ret i1 %cmp
+}
+
+;
+; i128
+;
+
+define i1 @test_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: test_ne_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $48, %esp
+; X86-NEXT:    movzbl 12(%ebp), %ecx
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, (%esp)
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    andb $12, %al
+; X86-NEXT:    negb %al
+; X86-NEXT:    movsbl %al, %esi
+; X86-NEXT:    movl 24(%esp,%esi), %edi
+; X86-NEXT:    movl 28(%esp,%esi), %eax
+; X86-NEXT:    shldl %cl, %edi, %eax
+; X86-NEXT:    movl 16(%esp,%esi), %edx
+; X86-NEXT:    movl 20(%esp,%esi), %esi
+; X86-NEXT:    shldl %cl, %esi, %edi
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    movl 8(%ebp), %ebx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    andl 8(%ebx), %edi
+; X86-NEXT:    andl (%ebx), %edx
+; X86-NEXT:    orl %edi, %edx
+; X86-NEXT:    andl 12(%ebx), %eax
+; X86-NEXT:    andl 4(%ebx), %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    setne %al
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: test_ne_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    xorl %edx, %edx
+; SSE-NEXT:    shldq %cl, %rax, %rdx
+; SSE-NEXT:    xorl %esi, %esi
+; SSE-NEXT:    shlq %cl, %rax
+; SSE-NEXT:    testb $64, %cl
+; SSE-NEXT:    cmovneq %rax, %rdx
+; SSE-NEXT:    cmovneq %rsi, %rax
+; SSE-NEXT:    andq 8(%rdi), %rdx
+; SSE-NEXT:    andq (%rdi), %rax
+; SSE-NEXT:    orq %rdx, %rax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: test_ne_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movl %esi, %ecx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    movl $1, %edx
+; AVX2-NEXT:    xorl %esi, %esi
+; AVX2-NEXT:    shldq %cl, %rdx, %rsi
+; AVX2-NEXT:    shlxq %rcx, %rdx, %rdx
+; AVX2-NEXT:    testb $64, %cl
+; AVX2-NEXT:    cmovneq %rdx, %rsi
+; AVX2-NEXT:    cmovneq %rax, %rdx
+; AVX2-NEXT:    andq 8(%rdi), %rsi
+; AVX2-NEXT:    andq (%rdi), %rdx
+; AVX2-NEXT:    orq %rsi, %rdx
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: test_ne_i128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    movl %esi, %ecx
+; AVX512-NEXT:    movl $1, %eax
+; AVX512-NEXT:    xorl %edx, %edx
+; AVX512-NEXT:    shldq %cl, %rax, %rdx
+; AVX512-NEXT:    xorl %esi, %esi
+; AVX512-NEXT:    shlxq %rcx, %rax, %rax
+; AVX512-NEXT:    testb $64, %cl
+; AVX512-NEXT:    cmovneq %rax, %rdx
+; AVX512-NEXT:    cmovneq %rsi, %rax
+; AVX512-NEXT:    andq 8(%rdi), %rdx
+; AVX512-NEXT:    andq (%rdi), %rax
+; AVX512-NEXT:    orq %rdx, %rax
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    retq
+  %rem = and i32 %position, 127
+  %ofs = zext nneg i32 %rem to i128
+  %bit = shl nuw i128 1, %ofs
+  %ld = load i128, ptr %word
+  %test = and i128 %ld, %bit
+  %cmp = icmp ne i128 %test, 0
+  ret i1 %cmp
+}
+
+define i1 @complement_ne_i128(ptr %word, i32 %position) nounwind {
+; X86-LABEL: complement_ne_i128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $80, %esp
+; X86-NEXT:    movzbl 12(%ebp), %ecx
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    andb $12, %al
+; X86-NEXT:    negb %al
+; X86-NEXT:    movsbl %al, %eax
+; X86-NEXT:    movl 56(%esp,%eax), %esi
+; X86-NEXT:    movl 60(%esp,%eax), %edx
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 48(%esp,%eax), %edi
+; X86-NEXT:    movl 52(%esp,%eax), %ebx
+; X86-NEXT:    shldl %cl, %ebx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl %cl, %edi, %ebx
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl 8(%eax), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl %esi, %eax
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andl %edi, %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl 12(%esi), %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/165361


More information about the llvm-commits mailing list