[llvm] fed9433 - Revert "[DAG] Reducing instructions by better legalization handling of AVGFLOORU for illegal data types (#99913)"
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 27 18:36:04 PDT 2024
Author: Craig Topper
Date: 2024-07-27T18:35:44-07:00
New Revision: fed94333fd54ec6012386b7b7977c3226ebfd2ac
URL: https://github.com/llvm/llvm-project/commit/fed94333fd54ec6012386b7b7977c3226ebfd2ac
DIFF: https://github.com/llvm/llvm-project/commit/fed94333fd54ec6012386b7b7977c3226ebfd2ac.diff
LOG: Revert "[DAG] Reducing instructions by better legalization handling of AVGFLOORU for illegal data types (#99913)"
This reverts commit d5521d128494690be66e03a674b9d1181935bf77.
The AArch64 test is failing on the bots.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/RISCV/avgflooru.ll
llvm/test/CodeGen/X86/avgflooru-scalar.ll
Removed:
llvm/test/CodeGen/AArch64/avgflooru-i128.ll
llvm/test/CodeGen/X86/avgflooru-i128.ll
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7fa83a5999dfe..6fd23b5ab9f5f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9379,26 +9379,6 @@ SDValue TargetLowering::expandAVG(SDNode *N, SelectionDAG &DAG) const {
}
}
- // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
- if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
- SDValue UAddWithOverflow =
- DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
-
- SDValue Sum = UAddWithOverflow.getValue(0);
- SDValue Overflow = UAddWithOverflow.getValue(1);
-
- // Right shift the sum by 1
- SDValue One = DAG.getShiftAmountConstant(1, VT, dl);
- SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum, One);
-
- SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
- SDValue OverflowShl =
- DAG.getNode(ISD::SHL, dl, VT, ZeroExtOverflow,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
-
- return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
- }
-
// avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
// avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
// avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
diff --git a/llvm/test/CodeGen/AArch64/avgflooru-i128.ll b/llvm/test/CodeGen/AArch64/avgflooru-i128.ll
deleted file mode 100644
index d336c38f20799..0000000000000
--- a/llvm/test/CodeGen/AArch64/avgflooru-i128.ll
+++ /dev/null
@@ -1,124 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
-
-define i128 @avgflooru_i128(i128 %x, i128 %y) {
-; CHECK-LABEL: avgflooru_i128:
-; CHECK: // %bb.0: // %start
-; CHECK-NEXT: adds x8, x0, x2
-; CHECK-NEXT: adcs x9, x1, x3
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: extr x0, x9, x8, #1
-; CHECK-NEXT: extr x1, x10, x9, #1
-; CHECK-NEXT: ret
-start:
- %xor = xor i128 %y, %x
- %lshr = lshr i128 %xor, 1
- %and = and i128 %y, %x
- %add = add i128 %lshr, %and
- ret i128 %add
-}
-
-declare void @use(i8)
-
-define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
-; CHECK-LABEL: avgflooru_i128_multi_use:
-; CHECK: // %bb.0: // %start
-; CHECK-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: eor x23, x3, x1
-; CHECK-NEXT: eor x24, x2, x0
-; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: mov x21, x1
-; CHECK-NEXT: mov x22, x0
-; CHECK-NEXT: mov x0, x24
-; CHECK-NEXT: mov x1, x23
-; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: mov x19, x3
-; CHECK-NEXT: mov x20, x2
-; CHECK-NEXT: bl use
-; CHECK-NEXT: extr x0, x23, x24, #1
-; CHECK-NEXT: lsr x1, x23, #1
-; CHECK-NEXT: bl use
-; CHECK-NEXT: adds x8, x22, x20
-; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: adcs x9, x21, x19
-; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: extr x0, x9, x8, #1
-; CHECK-NEXT: extr x1, x10, x9, #1
-; CHECK-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-start:
- %xor = xor i128 %y, %x
- call void @use(i128 %xor)
- %lshr = lshr i128 %xor, 1
- call void @use(i128 %lshr)
- %and = and i128 %y, %x
- %add = add i128 %lshr, %and
- ret i128 %add
-}
-
-; the 'avgflooru_i128_negative` shouldn't combine because it's not
-; an avgflooru operation, which is what we're targeting
-
-define i128 @avgflooru_i128_negative(i128 %x, i128 %y) {
-; CHECK-LABEL: avgflooru_i128_negative:
-; CHECK: // %bb.0: // %start
-; CHECK-NEXT: mvn x8, x0
-; CHECK-NEXT: and x9, x2, x0
-; CHECK-NEXT: mvn x10, x1
-; CHECK-NEXT: and x11, x3, x1
-; CHECK-NEXT: adds x0, x8, x9
-; CHECK-NEXT: adc x1, x10, x11
-; CHECK-NEXT: ret
-start:
- %xor = xor i128 %x, -1
- %and = and i128 %y, %x
- %add = add i128 %xor, %and
- ret i128 %add
-}
-
-; This negative test case shouldn't work, i32 is already properly
-; handled in terms of legalization, compared to the i128
-
-define i32 @avgflooru_i128_negative2(i32 %x, i32 %y) {
-; CHECK-LABEL: avgflooru_i128_negative2:
-; CHECK: // %bb.0: // %start
-; CHECK-NEXT: mov w8, w1
-; CHECK-NEXT: add x8, x8, w0, uxtw
-; CHECK-NEXT: lsr x0, x8, #1
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
-start:
- %xor = xor i32 %y, %x
- %lshr = lshr i32 %xor, 1
- %and = and i32 %y, %x
- %add = add i32 %lshr, %and
- ret i32 %add
-}
-
-define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
-; CHECK-LABEL: avgflooru_i128_vec:
-; CHECK: // %bb.0: // %start
-; CHECK-NEXT: adds x8, x0, x4
-; CHECK-NEXT: adcs x9, x1, x5
-; CHECK-NEXT: cset w10, hs
-; CHECK-NEXT: adds x11, x2, x6
-; CHECK-NEXT: extr x0, x9, x8, #1
-; CHECK-NEXT: adcs x12, x3, x7
-; CHECK-NEXT: extr x1, x10, x9, #1
-; CHECK-NEXT: extr x11, x12, x11, #1
-; CHECK-NEXT: cset w13, hs
-; CHECK-NEXT: extr x3, x13, x12, #1
-; CHECK-NEXT: fmov d0, x11
-; CHECK-NEXT: mov v0.d[1], x3
-; CHECK-NEXT: fmov x2, d0
-; CHECK-NEXT: ret
-start:
- %xor = xor <2 x i128> %y, %x
- %lshr = lshr <2 x i128> %xor, <i128 1, i128 1>
- %and = and <2 x i128> %y, %x
- %add = add <2 x i128> %lshr, %and
- ret <2 x i128> %add
-}
diff --git a/llvm/test/CodeGen/RISCV/avgflooru.ll b/llvm/test/CodeGen/RISCV/avgflooru.ll
index fa88c3760e455..b58aaab6aaf4a 100644
--- a/llvm/test/CodeGen/RISCV/avgflooru.ll
+++ b/llvm/test/CodeGen/RISCV/avgflooru.ll
@@ -164,20 +164,18 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; RV32I-LABEL: test_fixed_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: add a4, a3, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: sltu a1, a0, a2
-; RV32I-NEXT: add a2, a4, a1
-; RV32I-NEXT: beq a2, a3, .LBB6_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a1, a2, a3
-; RV32I-NEXT: .LBB6_2:
+; RV32I-NEXT: and a4, a1, a3
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: srli a3, a1, 1
+; RV32I-NEXT: add a3, a4, a3
; RV32I-NEXT: slli a1, a1, 31
-; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: slli a2, a2, 31
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: xor a4, a0, a2
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: add a0, a2, a1
+; RV32I-NEXT: sltu a1, a0, a2
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_fixed_i64:
@@ -197,20 +195,18 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; RV32I-LABEL: test_ext_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: add a4, a3, a1
-; RV32I-NEXT: add a0, a2, a0
-; RV32I-NEXT: sltu a1, a0, a2
-; RV32I-NEXT: add a2, a4, a1
-; RV32I-NEXT: beq a2, a3, .LBB7_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a1, a2, a3
-; RV32I-NEXT: .LBB7_2:
+; RV32I-NEXT: and a4, a1, a3
+; RV32I-NEXT: xor a1, a1, a3
+; RV32I-NEXT: srli a3, a1, 1
+; RV32I-NEXT: add a3, a4, a3
; RV32I-NEXT: slli a1, a1, 31
-; RV32I-NEXT: srli a3, a2, 1
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: slli a2, a2, 31
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: xor a4, a0, a2
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: and a2, a0, a2
+; RV32I-NEXT: add a0, a2, a1
+; RV32I-NEXT: sltu a1, a0, a2
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ext_i64:
diff --git a/llvm/test/CodeGen/X86/avgflooru-i128.ll b/llvm/test/CodeGen/X86/avgflooru-i128.ll
deleted file mode 100644
index da16a7da48ca6..0000000000000
--- a/llvm/test/CodeGen/X86/avgflooru-i128.ll
+++ /dev/null
@@ -1,145 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
-
-define i128 @avgflooru_i128(i128 %x, i128 %y) {
-; CHECK-LABEL: avgflooru_i128:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: adcq %rcx, %rsi
-; CHECK-NEXT: setb %cl
-; CHECK-NEXT: shrdq $1, %rsi, %rax
-; CHECK-NEXT: movzbl %cl, %edx
-; CHECK-NEXT: shldq $63, %rsi, %rdx
-; CHECK-NEXT: retq
-start:
- %xor = xor i128 %y, %x
- %lshr = lshr i128 %xor, 1
- %and = and i128 %y, %x
- %add = add i128 %lshr, %and
- ret i128 %add
-}
-
-declare void @use(i8)
-
-define i128 @avgflooru_i128_multi_use(i128 %x, i128 %y) nounwind {
-; CHECK-LABEL: avgflooru_i128_multi_use:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: pushq %r12
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: movq %rcx, %rbx
-; CHECK-NEXT: movq %rdx, %r14
-; CHECK-NEXT: movq %rsi, %r15
-; CHECK-NEXT: movq %rdi, %r12
-; CHECK-NEXT: movq %rdx, %r13
-; CHECK-NEXT: xorq %rdi, %r13
-; CHECK-NEXT: movq %rcx, %rbp
-; CHECK-NEXT: xorq %rsi, %rbp
-; CHECK-NEXT: movq %r13, %rdi
-; CHECK-NEXT: movq %rbp, %rsi
-; CHECK-NEXT: callq use at PLT
-; CHECK-NEXT: shrdq $1, %rbp, %r13
-; CHECK-NEXT: shrq %rbp
-; CHECK-NEXT: movq %r13, %rdi
-; CHECK-NEXT: movq %rbp, %rsi
-; CHECK-NEXT: callq use at PLT
-; CHECK-NEXT: addq %r14, %r12
-; CHECK-NEXT: adcq %rbx, %r15
-; CHECK-NEXT: setb %al
-; CHECK-NEXT: shrdq $1, %r15, %r12
-; CHECK-NEXT: movzbl %al, %edx
-; CHECK-NEXT: shldq $63, %r15, %rdx
-; CHECK-NEXT: movq %r12, %rax
-; CHECK-NEXT: addq $8, %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
-; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: retq
-start:
- %xor = xor i128 %y, %x
- call void @use(i128 %xor)
- %lshr = lshr i128 %xor, 1
- call void @use(i128 %lshr)
- %and = and i128 %y, %x
- %add = add i128 %lshr, %and
- ret i128 %add
-}
-
-; This test case shouldn't combine because it's not
-; an avgflooru operation
-
-define i128 @avgflooru_i128_negative(i128 %x, i128 %y) {
-; CHECK-LABEL: avgflooru_i128_negative:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: andq %rsi, %rcx
-; CHECK-NEXT: notq %rsi
-; CHECK-NEXT: andq %rdi, %rdx
-; CHECK-NEXT: notq %rax
-; CHECK-NEXT: addq %rdx, %rax
-; CHECK-NEXT: adcq %rcx, %rsi
-; CHECK-NEXT: movq %rsi, %rdx
-; CHECK-NEXT: retq
-start:
- %xor = xor i128 %x, -1
- %and = and i128 %y, %x
- %add = add i128 %xor, %and
- ret i128 %add
-}
-
-; This negative test case shouldn't combine, i32 is already properly
-; handled in terms of legalization, compared to the i128
-
-define i32 @avgflooru_i128_negative2(i32 %x, i32 %y) {
-; CHECK-LABEL: avgflooru_i128_negative2:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: addq %rcx, %rax
-; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
-; CHECK-NEXT: retq
-start:
- %xor = xor i32 %y, %x
- %lshr = lshr i32 %xor, 1
- %and = and i32 %y, %x
- %add = add i32 %lshr, %and
- ret i32 %add
-}
-
-define <2 x i128> @avgflooru_i128_vec(<2 x i128> %x, <2 x i128> %y) {
-; CHECK-LABEL: avgflooru_i128_vec:
-; CHECK: # %bb.0: # %start
-; CHECK-NEXT: movq %rdi, %rax
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rsi
-; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT: setb %dil
-; CHECK-NEXT: movzbl %dil, %edi
-; CHECK-NEXT: shldq $63, %rdx, %rdi
-; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: adcq {{[0-9]+}}(%rsp), %r8
-; CHECK-NEXT: setb %r9b
-; CHECK-NEXT: movzbl %r9b, %r9d
-; CHECK-NEXT: shldq $63, %r8, %r9
-; CHECK-NEXT: shldq $63, %rsi, %rdx
-; CHECK-NEXT: shldq $63, %rcx, %r8
-; CHECK-NEXT: movq %r8, 16(%rax)
-; CHECK-NEXT: movq %rdx, (%rax)
-; CHECK-NEXT: movq %r9, 24(%rax)
-; CHECK-NEXT: movq %rdi, 8(%rax)
-; CHECK-NEXT: retq
-start:
- %xor = xor <2 x i128> %y, %x
- %lshr = lshr <2 x i128> %xor, <i128 1, i128 1>
- %and = and <2 x i128> %y, %x
- %add = add <2 x i128> %lshr, %and
- ret <2 x i128> %add
-}
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index 0c91a9da5720a..d21c9d65ea9c8 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
@@ -168,14 +168,26 @@ define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_fixed_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setb %dl
-; X86-NEXT: movzbl %dl, %edx
-; X86-NEXT: shldl $31, %eax, %edx
-; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: shrdl $1, %edx, %ebx
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: test_fixed_i64:
@@ -196,14 +208,26 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_ext_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setb %dl
-; X86-NEXT: movzbl %dl, %edx
-; X86-NEXT: shldl $31, %eax, %edx
-; X86-NEXT: shldl $31, %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: shrdl $1, %edx, %ebx
+; X86-NEXT: andl %edi, %ecx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: test_ext_i64:
More information about the llvm-commits
mailing list