[llvm] [BOLT][tests] Fix jrcxz instruction test (PR #95861)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 16:04:24 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

<details>
<summary>Changes</summary>

Rewrite the test case intended to check that BOLT does not separate jrcxz instruction from its destination by more than a one-byte offset.

---
Full diff: https://github.com/llvm/llvm-project/pull/95861.diff


1 Files Affected:

- (modified) bolt/test/X86/bug-reorder-bb-jrcxz.s (+21-628) 


``````````diff
diff --git a/bolt/test/X86/bug-reorder-bb-jrcxz.s b/bolt/test/X86/bug-reorder-bb-jrcxz.s
index d5ac3548909e3..8a11ac4da4d67 100644
--- a/bolt/test/X86/bug-reorder-bb-jrcxz.s
+++ b/bolt/test/X86/bug-reorder-bb-jrcxz.s
@@ -1,640 +1,33 @@
-## Test performs a BB reordering with unsupported
-## instruction jrcxz. Reordering works correctly with the
-## follow options: None, Normal or Reverse. Other strategies
-## are completed with Assertion `isIntN(Size * 8 + 1, Value).
-## The cause is the distance between BB where one contains
-## jrcxz instruction.
-## Example: OpenSSL
-## https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
+## Check that BOLT handles code with jrcxz instruction that has a one-byte
+## signed offset restriction. If we try to separate jrcxz instruction from its
+## destination, e.g. by placing it in a different code fragment, then the link
+## step will fail.
 
 # REQUIRES: system-linux
 
-# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
-# RUN:   %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
 # RUN: link_fdata %s %t.o %t.fdata
-# RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
+# RUN: llvm-strip --strip-unneeded %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
 
-# RUN:  llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
-# RUN:    --reorder-blocks=ext-tsp --reorder-functions=hfsort \
-# RUN:    --split-functions --split-all-cold --split-eh --dyno-stats \
-# RUN:    --print-finalized 2>&1 | FileCheck %s
+## Disable relocation mode to leave main fragment in its original location.
 
-# CHECK-NOT: value of -2105 is too large for field of 1 byte.
+# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
+# RUN:   --split-functions --relocs=0
 
-  .text
-  .section .text.startup,"ax", at progbits
-  .p2align 5,,31
-  .globl main
-  .type main, @function
+	.text
+	.globl main
+	.type	main, at function
 main:
-  jmp bn_sqrx8x_internal
-
-.globl bn_sqrx8x_internal
-.hidden bn_sqrx8x_internal
-.type bn_sqrx8x_internal, at function
-.align 32
-bn_sqrx8x_internal:
-__bn_sqrx8x_internal:
-# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
-# FDATA: 1 bn_sqrx8x_internal 13 1  bn_sqrx8x_internal 40 0 60972
-# FDATA: 1 bn_sqrx8x_internal 5f 1  bn_sqrx8x_internal 2c 0 60972
-# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
-# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
-# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
-# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
-# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
-# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
-# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
-# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
-# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
-# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
-# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
-# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
-# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
-# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
-# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
-# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
-# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
-# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
-# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
-# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
+# FDATA: 0 [unknown] 0 1 main 0 0 1
+# FDATA: 1 main 0 1 main #.hot# 0 1
 .cfi_startproc
-  leaq 48+8(%rsp),%rdi
-  leaq (%rsi,%r9,1),%rbp
-  movq %r9,0+8(%rsp)
-  movq %rbp,8+8(%rsp)
-  jmp .Lsqr8x_zero_start
-
-.align 32
-.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
-.Lsqrx8x_zero:
-.byte 0x3e
-  movdqa %xmm0,0(%rdi)
-  movdqa %xmm0,16(%rdi)
-  movdqa %xmm0,32(%rdi)
-  movdqa %xmm0,48(%rdi)
-.Lsqr8x_zero_start:
-  movdqa %xmm0,64(%rdi)
-  movdqa %xmm0,80(%rdi)
-  movdqa %xmm0,96(%rdi)
-  movdqa %xmm0,112(%rdi)
-  leaq 128(%rdi),%rdi
-  subq $64,%r9
-  jnz .Lsqrx8x_zero
-
-  movq 0(%rsi),%rdx
-
-  xorq %r10,%r10
-  xorq %r11,%r11
-  xorq %r12,%r12
-  xorq %r13,%r13
-  xorq %r14,%r14
-  xorq %r15,%r15
-  leaq 48+8(%rsp),%rdi
-  xorq %rbp,%rbp
-  jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_loop:
-  mulxq 8(%rsi),%r8,%rax
-  adcxq %r9,%r8
-  adoxq %rax,%r10
-  mulxq 16(%rsi),%r9,%rax
-  adcxq %r10,%r9
-  adoxq %rax,%r11
-.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
-  adcxq %r11,%r10
-  adoxq %rax,%r12
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
-  adcxq %r12,%r11
-  adoxq %rax,%r13
-  mulxq 40(%rsi),%r12,%rax
-  adcxq %r13,%r12
-  adoxq %rax,%r14
-  mulxq 48(%rsi),%r13,%rax
-  adcxq %r14,%r13
-  adoxq %r15,%rax
-  mulxq 56(%rsi),%r14,%r15
-  movq 8(%rsi),%rdx
-  adcxq %rax,%r14
-  adoxq %rbp,%r15
-  adcq 64(%rdi),%r15
-  movq %r8,8(%rdi)
-  movq %r9,16(%rdi)
-  sbbq %rcx,%rcx
-  xorq %rbp,%rbp
-
-  mulxq 16(%rsi),%r8,%rbx
-  mulxq 24(%rsi),%r9,%rax
-  adcxq %r10,%r8
-  adoxq %rbx,%r9
-  mulxq 32(%rsi),%r10,%rbx
-  adcxq %r11,%r9
-  adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
-  adcxq %r12,%r10
-  adoxq %rbx,%r11
-.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
-  adcxq %r13,%r11
-  adoxq %r14,%r12
-.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
-  movq 16(%rsi),%rdx
-  adcxq %rax,%r12
-  adoxq %rbx,%r13
-  adcxq %r15,%r13
-  adoxq %rbp,%r14
-  adcxq %rbp,%r14
-
-  movq %r8,24(%rdi)
-  movq %r9,32(%rdi)
-
-  mulxq 24(%rsi),%r8,%rbx
-  mulxq 32(%rsi),%r9,%rax
-  adcxq %r10,%r8
-  adoxq %rbx,%r9
-  mulxq 40(%rsi),%r10,%rbx
-  adcxq %r11,%r9
-  adoxq %rax,%r10
-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
-  adcxq %r12,%r10
-  adoxq %r13,%r11
-.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
-.byte 0x3e
-  movq 24(%rsi),%rdx
-  adcxq %rbx,%r11
-  adoxq %rax,%r12
-  adcxq %r14,%r12
-  movq %r8,40(%rdi)
-  movq %r9,48(%rdi)
-  mulxq 32(%rsi),%r8,%rax
-  adoxq %rbp,%r13
-  adcxq %rbp,%r13
-
-  mulxq 40(%rsi),%r9,%rbx
-  adcxq %r10,%r8
-  adoxq %rax,%r9
-  mulxq 48(%rsi),%r10,%rax
-  adcxq %r11,%r9
-  adoxq %r12,%r10
-  mulxq 56(%rsi),%r11,%r12
-  movq 32(%rsi),%rdx
-  movq 40(%rsi),%r14
-  adcxq %rbx,%r10
-  adoxq %rax,%r11
-  movq 48(%rsi),%r15
-  adcxq %r13,%r11
-  adoxq %rbp,%r12
-  adcxq %rbp,%r12
-
-  movq %r8,56(%rdi)
-  movq %r9,64(%rdi)
-
-  mulxq %r14,%r9,%rax
-  movq 56(%rsi),%r8
-  adcxq %r10,%r9
-  mulxq %r15,%r10,%rbx
-  adoxq %rax,%r10
-  adcxq %r11,%r10
-  mulxq %r8,%r11,%rax
-  movq %r14,%rdx
-  adoxq %rbx,%r11
-  adcxq %r12,%r11
-
-  adcxq %rbp,%rax
-
-  mulxq %r15,%r14,%rbx
-  mulxq %r8,%r12,%r13
-  movq %r15,%rdx
-  leaq 64(%rsi),%rsi
-  adcxq %r14,%r11
-  adoxq %rbx,%r12
-  adcxq %rax,%r12
-  adoxq %rbp,%r13
-
-.byte 0x67,0x67
-  mulxq %r8,%r8,%r14
-  adcxq %r8,%r13
-  adcxq %rbp,%r14
-
-  cmpq 8+8(%rsp),%rsi
-  je .Lsqrx8x_outer_break
-
-  negq %rcx
-  movq $-8,%rcx
-  movq %rbp,%r15
-  movq 64(%rdi),%r8
-  adcxq 72(%rdi),%r9
-  adcxq 80(%rdi),%r10
-  adcxq 88(%rdi),%r11
-  adcq 96(%rdi),%r12
-  adcq 104(%rdi),%r13
-  adcq 112(%rdi),%r14
-  adcq 120(%rdi),%r15
-  leaq (%rsi),%rbp
-  leaq 128(%rdi),%rdi
-  sbbq %rax,%rax
-
-  movq -64(%rsi),%rdx
-  movq %rax,16+8(%rsp)
-  movq %rdi,24+8(%rsp)
-
+  jrcxz .Lcold
+.hot:
+  ret
 
+.Lcold:
   xorl %eax,%eax
-  jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_loop:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rax,%rbx
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rax,%r9
-  adcxq %rax,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rax,%r10
-  adcxq %rax,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rax,%r11
-  adcxq %rax,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
-  adcxq %rax,%r11
-  adoxq %r13,%r12
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  movq %rbx,(%rdi,%rcx,8)
-  movl $0,%ebx
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
-  movq 8(%rsi,%rcx,8),%rdx
-  adcxq %rax,%r14
-  adoxq %rbx,%r15
-  adcxq %rbx,%r15
-
-.byte 0x67
-  incq %rcx
-  jnz .Lsqrx8x_loop
-
-  leaq 64(%rbp),%rbp
-  movq $-8,%rcx
-  cmpq 8+8(%rsp),%rbp
-  je .Lsqrx8x_break
-
-  subq 16+8(%rsp),%rbx
-.byte 0x66
-  movq -64(%rsi),%rdx
-  adcxq 0(%rdi),%r8
-  adcxq 8(%rdi),%r9
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-.byte 0x67
-  sbbq %rax,%rax
-  xorl %ebx,%ebx
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_loop
-
-.align 32
-.Lsqrx8x_break:
-  xorq %rbp,%rbp
-  subq 16+8(%rsp),%rbx
-  adcxq %rbp,%r8
-  movq 24+8(%rsp),%rcx
-  adcxq %rbp,%r9
-  movq 0(%rsi),%rdx
-  adcq $0,%r10
-  movq %r8,0(%rdi)
-  adcq $0,%r11
-  adcq $0,%r12
-  adcq $0,%r13
-  adcq $0,%r14
-  adcq $0,%r15
-  cmpq %rcx,%rdi
-  je .Lsqrx8x_outer_loop
-
-  movq %r9,8(%rdi)
-  movq 8(%rcx),%r9
-  movq %r10,16(%rdi)
-  movq 16(%rcx),%r10
-  movq %r11,24(%rdi)
-  movq 24(%rcx),%r11
-  movq %r12,32(%rdi)
-  movq 32(%rcx),%r12
-  movq %r13,40(%rdi)
-  movq 40(%rcx),%r13
-  movq %r14,48(%rdi)
-  movq 48(%rcx),%r14
-  movq %r15,56(%rdi)
-  movq 56(%rcx),%r15
-  movq %rcx,%rdi
-  jmp .Lsqrx8x_outer_loop
-
-.align 32
-.Lsqrx8x_outer_break:
-  movq %r9,72(%rdi)
-.byte 102,72,15,126,217
-  movq %r10,80(%rdi)
-  movq %r11,88(%rdi)
-  movq %r12,96(%rdi)
-  movq %r13,104(%rdi)
-  movq %r14,112(%rdi)
-  leaq 48+8(%rsp),%rdi
-  movq (%rsi,%rcx,1),%rdx
-
-  movq 8(%rdi),%r11
-  xorq %r10,%r10
-  movq 0+8(%rsp),%r9
-  adoxq %r11,%r11
-  movq 16(%rdi),%r12
-  movq 24(%rdi),%r13
-
-.align 32
-.Lsqrx4x_shift_n_add:
-  mulxq %rdx,%rax,%rbx
-  adoxq %r12,%r12
-  adcxq %r10,%rax
-.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
-.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
-  adoxq %r13,%r13
-  adcxq %r11,%rbx
-  movq 40(%rdi),%r11
-  movq %rax,0(%rdi)
-  movq %rbx,8(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r10,%r10
-  adcxq %r12,%rax
-  movq 16(%rsi,%rcx,1),%rdx
-  movq 48(%rdi),%r12
-  adoxq %r11,%r11
-  adcxq %r13,%rbx
-  movq 56(%rdi),%r13
-  movq %rax,16(%rdi)
-  movq %rbx,24(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r12,%r12
-  adcxq %r10,%rax
-  movq 24(%rsi,%rcx,1),%rdx
-  leaq 32(%rcx),%rcx
-  movq 64(%rdi),%r10
-  adoxq %r13,%r13
-  adcxq %r11,%rbx
-  movq 72(%rdi),%r11
-  movq %rax,32(%rdi)
-  movq %rbx,40(%rdi)
-
-  mulxq %rdx,%rax,%rbx
-  adoxq %r10,%r10
-  adcxq %r12,%rax
-  jrcxz .Lsqrx4x_shift_n_add_break
-.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
-  adoxq %r11,%r11
-  adcxq %r13,%rbx
-  movq 80(%rdi),%r12
-  movq 88(%rdi),%r13
-  movq %rax,48(%rdi)
-  movq %rbx,56(%rdi)
-  leaq 64(%rdi),%rdi
-  nop
-  jmp .Lsqrx4x_shift_n_add
-
-.align 32
-.Lsqrx4x_shift_n_add_break:
-  adcxq %r13,%rbx
-  movq %rax,48(%rdi)
-  movq %rbx,56(%rdi)
-  leaq 64(%rdi),%rdi
-.byte 102,72,15,126,213
-__bn_sqrx8x_reduction:
-  xorl %eax,%eax
-  movq 32+8(%rsp),%rbx
-  movq 48+8(%rsp),%rdx
-  leaq -64(%rbp,%r9,1),%rcx
-
-  movq %rcx,0+8(%rsp)
-  movq %rdi,8+8(%rsp)
-
-  leaq 48+8(%rsp),%rdi
-  jmp .Lsqrx8x_reduction_loop
-
-.align 32
-.Lsqrx8x_reduction_loop:
-  movq 8(%rdi),%r9
-  movq 16(%rdi),%r10
-  movq 24(%rdi),%r11
-  movq 32(%rdi),%r12
-  movq %rdx,%r8
-  imulq %rbx,%rdx
-  movq 40(%rdi),%r13
-  movq 48(%rdi),%r14
-  movq 56(%rdi),%r15
-  movq %rax,24+8(%rsp)
-
-  leaq 64(%rdi),%rdi
-  xorq %rsi,%rsi
-  movq $-8,%rcx
-  jmp .Lsqrx8x_reduce
-
-.align 32
-.Lsqrx8x_reduce:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rbx,%rax
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rbx,%r9
-  adcxq %rbx,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rbx,%r10
-  adcxq %rbx,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rbx,%r11
-  adcxq %rbx,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
-  movq %rdx,%rax
-  movq %r8,%rdx
-  adcxq %rbx,%r11
-  adoxq %r13,%r12
-
-  mulxq 32+8(%rsp),%rbx,%rdx
-  movq %rax,%rdx
-  movq %rax,64+48+8(%rsp,%rcx,8)
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-  mulxq 56(%rbp),%rax,%r15
-  movq %rbx,%rdx
-  adcxq %rax,%r14
-  adoxq %rsi,%r15
-  adcxq %rsi,%r15
-
-.byte 0x67,0x67,0x67
-  incq %rcx
-  jnz .Lsqrx8x_reduce
-
-  movq %rsi,%rax
-  cmpq 0+8(%rsp),%rbp
-  jae .Lsqrx8x_no_tail
-
-  movq 48+8(%rsp),%rdx
-  addq 0(%rdi),%r8
-  leaq 64(%rbp),%rbp
-  movq $-8,%rcx
-  adcxq 8(%rdi),%r9
-  adcxq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-  sbbq %rax,%rax
-
-  xorq %rsi,%rsi
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail:
-  movq %r8,%rbx
-  mulxq 0(%rbp),%rax,%r8
-  adcxq %rax,%rbx
-  adoxq %r9,%r8
-
-  mulxq 8(%rbp),%rax,%r9
-  adcxq %rax,%r8
-  adoxq %r10,%r9
-
-  mulxq 16(%rbp),%rax,%r10
-  adcxq %rax,%r9
-  adoxq %r11,%r10
-
-  mulxq 24(%rbp),%rax,%r11
-  adcxq %rax,%r10
-  adoxq %r12,%r11
-
-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
-  adcxq %rax,%r11
-  adoxq %r13,%r12
-
-  mulxq 40(%rbp),%rax,%r13
-  adcxq %rax,%r12
-  adoxq %r14,%r13
-
-  mulxq 48(%rbp),%rax,%r14
-  adcxq %rax,%r13
-  adoxq %r15,%r14
-
-  mulxq 56(%rbp),%rax,%r15
-  movq 72+48+8(%rsp,%rcx,8),%rdx
-  adcxq %rax,%r14
-  adoxq %rsi,%r15
-  movq %rbx,(%rdi,%rcx,8)
-  movq %r8,%rbx
-  adcxq %rsi,%r15
-
-  incq %rcx
-  jnz .Lsqrx8x_tail
-
-  cmpq 0+8(%rsp),%rbp
-  jae .Lsqrx8x_tail_done
-
-  subq 16+8(%rsp),%rsi
-  movq 48+8(%rsp),%rdx
-  leaq 64(%rbp),%rbp
-  adcq 0(%rdi),%r8
-  adcq 8(%rdi),%r9
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  leaq 64(%rdi),%rdi
-  sbbq %rax,%rax
-  subq $8,%rcx
-
-  xorq %rsi,%rsi
-  movq %rax,16+8(%rsp)
-  jmp .Lsqrx8x_tail
-
-.align 32
-.Lsqrx8x_tail_done:
-  xorq %rax,%rax
-  addq 24+8(%rsp),%r8
-  adcq $0,%r9
-  adcq $0,%r10
-  adcq $0,%r11
-  adcq $0,%r12
-  adcq $0,%r13
-  adcq $0,%r14
-  adcq $0,%r15
-  adcq $0,%rax
-
-  subq 16+8(%rsp),%rsi
-.Lsqrx8x_no_tail:
-  adcq 0(%rdi),%r8
-.byte 102,72,15,126,217
-  adcq 8(%rdi),%r9
-  movq 56(%rbp),%rsi
-.byte 102,72,15,126,213
-  adcq 16(%rdi),%r10
-  adcq 24(%rdi),%r11
-  adcq 32(%rdi),%r12
-  adcq 40(%rdi),%r13
-  adcq 48(%rdi),%r14
-  adcq 56(%rdi),%r15
-  adcq $0,%rax
-
-  movq 32+8(%rsp),%rbx
-  movq 64(%rdi,%rcx,1),%rdx
-
-  movq %r8,0(%rdi)
-  leaq 64(%rdi),%r8
-  movq %r9,8(%rdi)
-  movq %r10,16(%rdi)
-  movq %r11,24(%rdi)
-  movq %r12,32(%rdi)
-  movq %r13,40(%rdi)
-  movq %r14,48(%rdi)
-  movq %r15,56(%rdi)
-
-  leaq 64(%rdi,%rcx,1),%rdi
-  cmpq 8+8(%rsp),%r8
-  jb .Lsqrx8x_reduction_loop
-  .byte 0xf3,0xc3
+  ret
 .cfi_endproc
-.size  bn_sqrx8x_internal,.-bn_sqrx8x_internal
+.size  main,.-main

``````````

</details>


https://github.com/llvm/llvm-project/pull/95861


More information about the llvm-commits mailing list