[llvm] [DAG] Reducing instructions by better legalization handling of AVGFLOORU for illegal data types (PR #101223)
Julius Alexandre via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 12:14:35 PDT 2024
https://github.com/medievalghoul created https://github.com/llvm/llvm-project/pull/101223
Previous reverted merge: https://github.com/llvm/llvm-project/pull/99913
Previous error generated by LLVM Bot:
<details>
<summary>Error generated by avgflooru-i128.ll</summary>
```
BUILD FAILED: failed test (failure)
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AArch64/avgflooru-i128.ll' FAILED ********************
Exit Code: 1
Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=aarch64 < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/avgflooru-i128.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/avgflooru-i128.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=aarch64
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/avgflooru-i128.ll
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/avgflooru-i128.ll:111:15: error: CHECK-NEXT: expected string not found in input
; CHECK-NEXT: extr x11, x12, x11, #1
^
<stdin>:97:22: note: scanning from here
extr x1, x10, x9, #1
^
<stdin>:99:2: note: possible intended match here
extr x2, x12, x11, #1
^
Input file: <stdin>
Check file: /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AArch64/avgflooru-i128.ll
-dump-input=help explains the following input dump.
Input was:
<<<<<<
.
.
.
92: adcs x9, x1, x5
93: cset w10, hs
94: adds x11, x2, x6
95: extr x0, x9, x8, #1
96: adcs x12, x3, x7
97: extr x1, x10, x9, #1
next:111'0 X error: no match found
98: cset w8, hs
next:111'0 ~~~~~~~~~~~~~
99: extr x2, x12, x11, #1
next:111'0 ~~~~~~~~~~~~~~~~~~~~~~~
next:111'1 ? possible intended match
100: extr x3, x8, x12, #1
next:111'0 ~~~~~~~~~~~~~~~~~~~~~~
101: ret
next:111'0 ~~~~~
102: .Lfunc_end4:
next:111'0 ~~~~~~~~~~~~~
103: .size avgflooru_i128_vec, .Lfunc_end4-avgflooru_i128_vec
next:111'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104: .cfi_endproc
next:111'0 ~~~~~~~~~~~~~~
...
```
</details>
It doesn't seem as if there's a fix for the error message. I wasn't able to reproduce this error. All my attempts to do
`ninja check` was a successful. This may be an issue with LLVM Bot? I removed the `AArch64/avgflooru-i128.ll`
test in order to prevent this issue from happening.
### To reiterate:
https://rust.godbolt.org/z/T7eKP3Tvo
x86: https://alive2.llvm.org/ce/z/ze88Hw
cc: @RKSimon @topperc
>From 2667ca01e9a6ecb6bfb0cd7371d0e166d78bbbad Mon Sep 17 00:00:00 2001
From: medievalghoul <61852278+medievalghoul at users.noreply.github.com>
Date: Tue, 30 Jul 2024 14:31:07 -0400
Subject: [PATCH 1/5] test without opt
---
llvm/test/CodeGen/X86/avgflooru-i128.ll | 170 ++++++++++++++++++++++++
1 file changed, 170 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/avgflooru-i128.ll
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
new file mode 100644
index 0000000000000..a4c10f93a3193
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+define i128 @avgflooru_i128(i128 %x, i128 %y) {
+; CHECK-LABEL: avgflooru_i128:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: xorq %rsi, %r8
+; CHECK-NEXT: shrdq $1, %r8, %rax
+; CHECK-NEXT: andq %rsi, %rcx
+; CHECK-NEXT: shrq %r8
+; CHECK-NEXT: andq %rdi, %rdx
+; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: adcq %rcx, %r8
+; CHECK-NEXT: movq %r8, %rdx
+; CHECK-NEXT: retq
+start:
+ %xor = xor i128 %y, %x
+ %lshr = lshr i128 %xor, 1
+ %and = and i128 %y, %x
+ %add = add i128 %lshr, %and
+ ret i128 %add
+}
+
+declare void @use(i8)
+
+define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
+; CHECK-LABEL: avgflooru_i128_multi_use:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movq %rcx, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movq %rsi, %r15
+; CHECK-NEXT: movq %rdi, %r12
+; CHECK-NEXT: movq %rdx, %r13
+; CHECK-NEXT: xorq %rdi, %r13
+; CHECK-NEXT: movq %rcx, %rbp
+; CHECK-NEXT: xorq %rsi, %rbp
+; CHECK-NEXT: movq %r13, %rdi
+; CHECK-NEXT: movq %rbp, %rsi
+; CHECK-NEXT: callq use at PLT
+; CHECK-NEXT: shrdq $1, %rbp, %r13
+; CHECK-NEXT: shrq %rbp
+; CHECK-NEXT: movq %r13, %rdi
+; CHECK-NEXT: movq %rbp, %rsi
+; CHECK-NEXT: callq use at PLT
+; CHECK-NEXT: andq %r15, %rbx
+; CHECK-NEXT: andq %r12, %r14
+; CHECK-NEXT: addq %r13, %r14
+; CHECK-NEXT: adcq %rbp, %rbx
+; CHECK-NEXT: movq %r14, %rax
+; CHECK-NEXT: movq %rbx, %rdx
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+start:
+ %xor = xor i128 %y, %x
+ call void @use(i128 %xor)
+ %lshr = lshr i128 %xor, 1
+ call void @use(i128 %lshr)
+ %and = and i128 %y, %x
+ %add = add i128 %lshr, %and
+ ret i128 %add
+}
+
+; This test case shouldn't combine because it's not
+; an avgflooru operation
+
+define i128 @avgflooru_i128_negative(i128 %x, i128 %y) {
+; CHECK-LABEL: avgflooru_i128_negative:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: andq %rsi, %rcx
+; CHECK-NEXT: notq %rsi
+; CHECK-NEXT: andq %rdi, %rdx
+; CHECK-NEXT: notq %rax
+; CHECK-NEXT: addq %rdx, %rax
+; CHECK-NEXT: adcq %rcx, %rsi
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: retq
+start:
+ %xor = xor i128 %x, -1
+ %and = and i128 %y, %x
+ %add = add i128 %xor, %and
+ ret i128 %add
+}
+
+; This negative test case shouldn't combine, i32 is already properly
+; handled in terms of legalization, compared to the i128
+
+define i32 @avgflooru_i128_negative2(i32 %x, i32 %y) {
+; CHECK-LABEL: avgflooru_i128_negative2:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+start:
+ %xor = xor i32 %y, %x
+ %lshr = lshr i32 %xor, 1
+ %and = and i32 %y, %x
+ %add = add i32 %lshr, %and
+ ret i32 %add
+}
+
+define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
+; CHECK-LABEL: avgflooru_i128_vec:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT: movq %r10, %r14
+; CHECK-NEXT: xorq %rsi, %r14
+; CHECK-NEXT: movq %rbx, %r11
+; CHECK-NEXT: xorq %rdx, %r11
+; CHECK-NEXT: shrdq $1, %r11, %r14
+; CHECK-NEXT: andq %rdx, %rbx
+; CHECK-NEXT: shrq %r11
+; CHECK-NEXT: andq %rsi, %r10
+; CHECK-NEXT: addq %r14, %r10
+; CHECK-NEXT: adcq %rbx, %r11
+; CHECK-NEXT: movq %r9, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: xorq %r8, %rsi
+; CHECK-NEXT: shrdq $1, %rsi, %rdx
+; CHECK-NEXT: andq %r8, %rdi
+; CHECK-NEXT: shrq %rsi
+; CHECK-NEXT: andq %rcx, %r9
+; CHECK-NEXT: addq %rdx, %r9
+; CHECK-NEXT: adcq %rdi, %rsi
+; CHECK-NEXT: movq %r9, 16(%rax)
+; CHECK-NEXT: movq %r10, (%rax)
+; CHECK-NEXT: movq %rsi, 24(%rax)
+; CHECK-NEXT: movq %r11, 8(%rax)
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+start:
+ %xor = xor <2 x i128> %y, %x
+ %lshr = lshr <2 x i128> %xor, <i128 1, i128 1>
+ %and = and <2 x i128> %y, %x
+ %add = add <2 x i128> %lshr, %and
+ ret <2 x i128> %add
+}
>From 594ae95a4ed6788b1abe14f46a8c74946ade9050 Mon Sep 17 00:00:00 2001
From: medievalghoul <61852278+medievalghoul at users.noreply.github.com>
Date: Tue, 30 Jul 2024 14:46:01 -0400
Subject: [PATCH 2/5] test with opt
---
llvm/test/CodeGen/RISCV/avgflooru.ll | 48 +++++++------
llvm/test/CodeGen/X86/avgflooru-i128.ll | 83 ++++++++---------------
llvm/test/CodeGen/X86/avgflooru-scalar.ll | 52 ++++----------
3 files changed, 69 insertions(+), 114 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/avgflooru.ll b/llvm/test/CodeGen/RISCV/avgflooru.ll
index b58aaab6aaf4a..fa88c3760e455 100644
--- a/llvm/test/CodeGen/RISCV/avgflooru.ll
+++ b/llvm/test/CodeGen/RISCV/avgflooru.ll
@@ -164,18 +164,20 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; RV32I-LABEL: test_fixed_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: and a4, a1, a3
-; RV32I-NEXT: xor a1, a1, a3
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: add a3, a4, a3
-; RV32I-NEXT: slli a1, a1, 31
-; RV32I-NEXT: xor a4, a0, a2
-; RV32I-NEXT: srli a4, a4, 1
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: and a2, a0, a2
-; RV32I-NEXT: add a0, a2, a1
+; RV32I-NEXT: add a4, a3, a1
+; RV32I-NEXT: add a0, a2, a0
; RV32I-NEXT: sltu a1, a0, a2
-; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: add a2, a4, a1
+; RV32I-NEXT: beq a2, a3, .LBB6_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a1, a2, a3
+; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: slli a1, a1, 31
+; RV32I-NEXT: srli a3, a2, 1
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: slli a2, a2, 31
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_fixed_i64:
@@ -195,18 +197,20 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; RV32I-LABEL: test_ext_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: and a4, a1, a3
-; RV32I-NEXT: xor a1, a1, a3
-; RV32I-NEXT: srli a3, a1, 1
-; RV32I-NEXT: add a3, a4, a3
-; RV32I-NEXT: slli a1, a1, 31
-; RV32I-NEXT: xor a4, a0, a2
-; RV32I-NEXT: srli a4, a4, 1
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: and a2, a0, a2
-; RV32I-NEXT: add a0, a2, a1
+; RV32I-NEXT: add a4, a3, a1
+; RV32I-NEXT: add a0, a2, a0
; RV32I-NEXT: sltu a1, a0, a2
-; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: add a2, a4, a1
+; RV32I-NEXT: beq a2, a3, .LBB7_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: sltu a1, a2, a3
+; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: slli a1, a1, 31
+; RV32I-NEXT: srli a3, a2, 1
+; RV32I-NEXT: or a1, a3, a1
+; RV32I-NEXT: slli a2, a2, 31
+; RV32I-NEXT: srli a0, a0, 1
+; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ext_i64:
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
index a4c10f93a3193..da16a7da48ca6 100644
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -4,17 +4,13 @@
define i128 @avgflooru_i128(i128 %x, i128 %y) {
; CHECK-LABEL: avgflooru_i128:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: xorq %rdi, %rax
-; CHECK-NEXT: movq %rcx, %r8
-; CHECK-NEXT: xorq %rsi, %r8
-; CHECK-NEXT: shrdq $1, %r8, %rax
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: shrq %r8
-; CHECK-NEXT: andq %rdi, %rdx
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: adcq %rcx, %r8
-; CHECK-NEXT: movq %r8, %rdx
+; CHECK-NEXT: adcq %rcx, %rsi
+; CHECK-NEXT: setb %cl
+; CHECK-NEXT: shrdq $1, %rsi, %rax
+; CHECK-NEXT: movzbl %cl, %edx
+; CHECK-NEXT: shldq $63, %rsi, %rdx
; CHECK-NEXT: retq
start:
%xor = xor i128 %y, %x
@@ -52,12 +48,13 @@ define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
; CHECK-NEXT: movq %r13, %rdi
; CHECK-NEXT: movq %rbp, %rsi
; CHECK-NEXT: callq use at PLT
-; CHECK-NEXT: andq %r15, %rbx
-; CHECK-NEXT: andq %r12, %r14
-; CHECK-NEXT: addq %r13, %r14
-; CHECK-NEXT: adcq %rbp, %rbx
-; CHECK-NEXT: movq %r14, %rax
-; CHECK-NEXT: movq %rbx, %rdx
+; CHECK-NEXT: addq %r14, %r12
+; CHECK-NEXT: adcq %rbx, %r15
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: shrdq $1, %r15, %r12
+; CHECK-NEXT: movzbl %al, %edx
+; CHECK-NEXT: shldq $63, %r15, %rdx
+; CHECK-NEXT: movq %r12, %rax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
@@ -121,45 +118,23 @@ start:
define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset %rbx, -24
-; CHECK-NEXT: .cfi_offset %r14, -16
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r9
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; CHECK-NEXT: movq %r10, %r14
-; CHECK-NEXT: xorq %rsi, %r14
-; CHECK-NEXT: movq %rbx, %r11
-; CHECK-NEXT: xorq %rdx, %r11
-; CHECK-NEXT: shrdq $1, %r11, %r14
-; CHECK-NEXT: andq %rdx, %rbx
-; CHECK-NEXT: shrq %r11
-; CHECK-NEXT: andq %rsi, %r10
-; CHECK-NEXT: addq %r14, %r10
-; CHECK-NEXT: adcq %rbx, %r11
-; CHECK-NEXT: movq %r9, %rdx
-; CHECK-NEXT: xorq %rcx, %rdx
-; CHECK-NEXT: movq %rdi, %rsi
-; CHECK-NEXT: xorq %r8, %rsi
-; CHECK-NEXT: shrdq $1, %rsi, %rdx
-; CHECK-NEXT: andq %r8, %rdi
-; CHECK-NEXT: shrq %rsi
-; CHECK-NEXT: andq %rcx, %r9
-; CHECK-NEXT: addq %rdx, %r9
-; CHECK-NEXT: adcq %rdi, %rsi
-; CHECK-NEXT: movq %r9, 16(%rax)
-; CHECK-NEXT: movq %r10, (%rax)
-; CHECK-NEXT: movq %rsi, 24(%rax)
-; CHECK-NEXT: movq %r11, 8(%rax)
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
+; CHECK-NEXT: setb %dil
+; CHECK-NEXT: movzbl %dil, %edi
+; CHECK-NEXT: shldq $63, %rdx, %rdi
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
+; CHECK-NEXT: setb %r9b
+; CHECK-NEXT: movzbl %r9b, %r9d
+; CHECK-NEXT: shldq $63, %r8, %r9
+; CHECK-NEXT: shldq $63, %rsi, %rdx
+; CHECK-NEXT: shldq $63, %rcx, %r8
+; CHECK-NEXT: movq %r8, 16(%rax)
+; CHECK-NEXT: movq %rdx, (%rax)
+; CHECK-NEXT: movq %r9, 24(%rax)
+; CHECK-NEXT: movq %rdi, 8(%rax)
; CHECK-NEXT: retq
start:
%xor = xor <2 x i128> %y, %x
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index d21c9d65ea9c8..0c91a9da5720a 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
@@ -168,26 +168,14 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_fixed_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: shrdl $1, %edx, %ebx
-; X86-NEXT: andl %edi, %ecx
-; X86-NEXT: shrl %edx
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %dl
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: shldl $31, %eax, %edx
+; X86-NEXT: shldl $31, %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i64:
@@ -208,26 +196,14 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_ext_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: xorl %esi, %ebx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: xorl %edi, %edx
-; X86-NEXT: shrdl $1, %edx, %ebx
-; X86-NEXT: andl %edi, %ecx
-; X86-NEXT: shrl %edx
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: addl %ebx, %eax
-; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %dl
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: shldl $31, %eax, %edx
+; X86-NEXT: shldl $31, %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i64:
>From 8a4fe97c0310ace4708b4f1243a3464c7e251028 Mon Sep 17 00:00:00 2001
From: medievalghoul <61852278+medievalghoul at users.noreply.github.com>
Date: Tue, 30 Jul 2024 14:46:34 -0400
Subject: [PATCH 3/5] the optimization
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6fd23b5ab9f5f..7fa83a5999dfe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9379,6 +9379,26 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
}
}
+ // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
+ if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
+ SDValue UAddWithOverflow =
+ DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
+
+ SDValue Sum = UAddWithOverflow.getValue(0);
+ SDValue Overflow = UAddWithOverflow.getValue(1);
+
+ // Right shift the sum by 1
+ SDValue One = DAG.getShiftAmountConstant(1, VT, dl);
+ SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum, One);
+
+ SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
+ SDValue OverflowShl =
+ DAG.getNode(ISD::SHL, dl, VT, ZeroExtOverflow,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+
+ return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
+ }
+
// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
>From 15d46178c1e8584be7a18f044b3b6be2e569ff69 Mon Sep 17 00:00:00 2001
From: medievalghoul <61852278+medievalghoul at users.noreply.github.com>
Date: Tue, 30 Jul 2024 15:05:10 -0400
Subject: [PATCH 4/5] custom test without opt
---
llvm/test/CodeGen/X86/avgflooru-i128.ll | 83 ++++++++++++++++---------
1 file changed, 54 insertions(+), 29 deletions(-)
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
index da16a7da48ca6..a4c10f93a3193 100644
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -4,13 +4,17 @@
define i128 @avgflooru_i128(i128 %x, i128 %y) {
; CHECK-LABEL: avgflooru_i128:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: movq %rcx, %r8
+; CHECK-NEXT: xorq %rsi, %r8
+; CHECK-NEXT: shrdq $1, %r8, %rax
+; CHECK-NEXT: andq %rsi, %rcx
+; CHECK-NEXT: shrq %r8
+; CHECK-NEXT: andq %rdi, %rdx
; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: adcq %rcx, %rsi
-; CHECK-NEXT: setb %cl
-; CHECK-NEXT: shrdq $1, %rsi, %rax
-; CHECK-NEXT: movzbl %cl, %edx
-; CHECK-NEXT: shldq $63, %rsi, %rdx
+; CHECK-NEXT: adcq %rcx, %r8
+; CHECK-NEXT: movq %r8, %rdx
; CHECK-NEXT: retq
start:
%xor = xor i128 %y, %x
@@ -48,13 +52,12 @@ define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
; CHECK-NEXT: movq %r13, %rdi
; CHECK-NEXT: movq %rbp, %rsi
; CHECK-NEXT: callq use at PLT
-; CHECK-NEXT: addq %r14, %r12
-; CHECK-NEXT: adcq %rbx, %r15
-; CHECK-NEXT: setb %al
-; CHECK-NEXT: shrdq $1, %r15, %r12
-; CHECK-NEXT: movzbl %al, %edx
-; CHECK-NEXT: shldq $63, %r15, %rdx
-; CHECK-NEXT: movq %r12, %rax
+; CHECK-NEXT: andq %r15, %rbx
+; CHECK-NEXT: andq %r12, %r14
+; CHECK-NEXT: addq %r13, %r14
+; CHECK-NEXT: adcq %rbp, %rbx
+; CHECK-NEXT: movq %r14, %rax
+; CHECK-NEXT: movq %rbx, %rdx
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
@@ -118,23 +121,45 @@ start:
define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT: setb %dil
-; CHECK-NEXT: movzbl %dil, %edi
-; CHECK-NEXT: shldq $63, %rdx, %rdi
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
-; CHECK-NEXT: setb %r9b
-; CHECK-NEXT: movzbl %r9b, %r9d
-; CHECK-NEXT: shldq $63, %r8, %r9
-; CHECK-NEXT: shldq $63, %rsi, %rdx
-; CHECK-NEXT: shldq $63, %rcx, %r8
-; CHECK-NEXT: movq %r8, 16(%rax)
-; CHECK-NEXT: movq %rdx, (%rax)
-; CHECK-NEXT: movq %r9, 24(%rax)
-; CHECK-NEXT: movq %rdi, 8(%rax)
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; CHECK-NEXT: movq %r10, %r14
+; CHECK-NEXT: xorq %rsi, %r14
+; CHECK-NEXT: movq %rbx, %r11
+; CHECK-NEXT: xorq %rdx, %r11
+; CHECK-NEXT: shrdq $1, %r11, %r14
+; CHECK-NEXT: andq %rdx, %rbx
+; CHECK-NEXT: shrq %r11
+; CHECK-NEXT: andq %rsi, %r10
+; CHECK-NEXT: addq %r14, %r10
+; CHECK-NEXT: adcq %rbx, %r11
+; CHECK-NEXT: movq %r9, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: xorq %r8, %rsi
+; CHECK-NEXT: shrdq $1, %rsi, %rdx
+; CHECK-NEXT: andq %r8, %rdi
+; CHECK-NEXT: shrq %rsi
+; CHECK-NEXT: andq %rcx, %r9
+; CHECK-NEXT: addq %rdx, %r9
+; CHECK-NEXT: adcq %rdi, %rsi
+; CHECK-NEXT: movq %r9, 16(%rax)
+; CHECK-NEXT: movq %r10, (%rax)
+; CHECK-NEXT: movq %rsi, 24(%rax)
+; CHECK-NEXT: movq %r11, 8(%rax)
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
start:
%xor = xor <2 x i128> %y, %x
>From 7a906be4aa6f9eea89c16e3d138bfc1eed19bf40 Mon Sep 17 00:00:00 2001
From: medievalghoul <61852278+medievalghoul at users.noreply.github.com>
Date: Tue, 30 Jul 2024 15:06:42 -0400
Subject: [PATCH 5/5] custom test with opt
---
llvm/test/CodeGen/X86/avgflooru-i128.ll | 83 +++++++++----------------
1 file changed, 29 insertions(+), 54 deletions(-)
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
index a4c10f93a3193..da16a7da48ca6 100644
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-i128.ll
@@ -4,17 +4,13 @@
define i128 @avgflooru_i128(i128 %x, i128 %y) {
; CHECK-LABEL: avgflooru_i128:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: xorq %rdi, %rax
-; CHECK-NEXT: movq %rcx, %r8
-; CHECK-NEXT: xorq %rsi, %r8
-; CHECK-NEXT: shrdq $1, %r8, %rax
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: shrq %r8
-; CHECK-NEXT: andq %rdi, %rdx
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: adcq %rcx, %r8
-; CHECK-NEXT: movq %r8, %rdx
+; CHECK-NEXT: adcq %rcx, %rsi
+; CHECK-NEXT: setb %cl
+; CHECK-NEXT: shrdq $1, %rsi, %rax
+; CHECK-NEXT: movzbl %cl, %edx
+; CHECK-NEXT: shldq $63, %rsi, %rdx
; CHECK-NEXT: retq
start:
%xor = xor i128 %y, %x
@@ -52,12 +48,13 @@ define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
; CHECK-NEXT: movq %r13, %rdi
; CHECK-NEXT: movq %rbp, %rsi
; CHECK-NEXT: callq use at PLT
-; CHECK-NEXT: andq %r15, %rbx
-; CHECK-NEXT: andq %r12, %r14
-; CHECK-NEXT: addq %r13, %r14
-; CHECK-NEXT: adcq %rbp, %rbx
-; CHECK-NEXT: movq %r14, %rax
-; CHECK-NEXT: movq %rbx, %rdx
+; CHECK-NEXT: addq %r14, %r12
+; CHECK-NEXT: adcq %rbx, %r15
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: shrdq $1, %r15, %r12
+; CHECK-NEXT: movzbl %al, %edx
+; CHECK-NEXT: shldq $63, %r15, %rdx
+; CHECK-NEXT: movq %r12, %rax
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
@@ -121,45 +118,23 @@ start:
define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
; CHECK-LABEL: avgflooru_i128_vec:
; CHECK: # %bb.0: # %start
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset %rbx, -24
-; CHECK-NEXT: .cfi_offset %r14, -16
; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r9
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
-; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; CHECK-NEXT: movq %r10, %r14
-; CHECK-NEXT: xorq %rsi, %r14
-; CHECK-NEXT: movq %rbx, %r11
-; CHECK-NEXT: xorq %rdx, %r11
-; CHECK-NEXT: shrdq $1, %r11, %r14
-; CHECK-NEXT: andq %rdx, %rbx
-; CHECK-NEXT: shrq %r11
-; CHECK-NEXT: andq %rsi, %r10
-; CHECK-NEXT: addq %r14, %r10
-; CHECK-NEXT: adcq %rbx, %r11
-; CHECK-NEXT: movq %r9, %rdx
-; CHECK-NEXT: xorq %rcx, %rdx
-; CHECK-NEXT: movq %rdi, %rsi
-; CHECK-NEXT: xorq %r8, %rsi
-; CHECK-NEXT: shrdq $1, %rsi, %rdx
-; CHECK-NEXT: andq %r8, %rdi
-; CHECK-NEXT: shrq %rsi
-; CHECK-NEXT: andq %rcx, %r9
-; CHECK-NEXT: addq %rdx, %r9
-; CHECK-NEXT: adcq %rdi, %rsi
-; CHECK-NEXT: movq %r9, 16(%rax)
-; CHECK-NEXT: movq %r10, (%rax)
-; CHECK-NEXT: movq %rsi, 24(%rax)
-; CHECK-NEXT: movq %r11, 8(%rax)
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
+; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
+; CHECK-NEXT: setb %dil
+; CHECK-NEXT: movzbl %dil, %edi
+; CHECK-NEXT: shldq $63, %rdx, %rdi
+; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
+; CHECK-NEXT: setb %r9b
+; CHECK-NEXT: movzbl %r9b, %r9d
+; CHECK-NEXT: shldq $63, %r8, %r9
+; CHECK-NEXT: shldq $63, %rsi, %rdx
+; CHECK-NEXT: shldq $63, %rcx, %r8
+; CHECK-NEXT: movq %r8, 16(%rax)
+; CHECK-NEXT: movq %rdx, (%rax)
+; CHECK-NEXT: movq %r9, 24(%rax)
+; CHECK-NEXT: movq %rdi, 8(%rax)
; CHECK-NEXT: retq
start:
%xor = xor <2 x i128> %y, %x
More information about the llvm-commits
mailing list