[llvm] dcfaaef - [SDAG] Add test for DAGCombiner multiple result commutative CSE
Itay Bookstein via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 16 00:39:29 PDT 2022
Author: Itay Bookstein
Date: 2022-07-16T10:39:17+03:00
New Revision: dcfaaef105dc5a193568c7d0b25199408a38da21
URL: https://github.com/llvm/llvm-project/commit/dcfaaef105dc5a193568c7d0b25199408a38da21
DIFF: https://github.com/llvm/llvm-project/commit/dcfaaef105dc5a193568c7d0b25199408a38da21.diff
LOG: [SDAG] Add test for DAGCombiner multiple result commutative CSE
This commit adds a test for DAGCombiner commutative CSE on
nodes with multiple results (UMUL_LOHI). In this commit it
asserts the lack of CSE, a later commit will demonstrate
the CSE in the changed assertions.
Signed-off-by: Itay Bookstein <ibookstein at gmail.com>
Reviewed By: barannikov88
Differential Revision: https://reviews.llvm.org/D129905
Added:
Modified:
llvm/test/CodeGen/X86/dagcombine-cse.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll
index 367724c28e07..0b02ce732820 100644
--- a/llvm/test/CodeGen/X86/dagcombine-cse.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll
@@ -41,3 +41,118 @@ entry:
%tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1]
ret i32 %tmp48
}
+
+; Test CSE for SDAG nodes with multiple results (UMUL_LOHI).
+; Pre-committed before fix, so this does not demonstrate CSE yet.
+define i96 @square_high(i96 %x) nounwind {
+; X86-LABEL: square_high:
+; X86: ## %bb.0: ## %entry
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: adcl $0, %eax
+; X86-NEXT: setb %bl
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: mull %ebp
+; X86-NEXT: addl %ecx, %edi
+; X86-NEXT: adcl %eax, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movzbl %bl, %ecx
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: movl %ebp, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl (%esp), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %ebp, %esi
+; X86-NEXT: setb %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %eax
+; X86-NEXT: movl %eax, %ebp
+; X86-NEXT: addl %esi, %ebp
+; X86-NEXT: movzbl %bl, %esi
+; X86-NEXT: adcl %edx, %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Folded Reload
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %edi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: addl (%esp), %ebx ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; X86-NEXT: adcl %esi, %ebx
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: mull %eax
+; X86-NEXT: addl %eax, %edi
+; X86-NEXT: adcl %edx, %ecx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+;
+; X64-LABEL: square_high:
+; X64: ## %bb.0: ## %entry
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: movq %rdx, %r8
+; X64-NEXT: movq %rax, %rsi
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rdi
+; X64-NEXT: addq %rdx, %rsi
+; X64-NEXT: adcq $0, %r8
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: addq %rsi, %rax
+; X64-NEXT: adcq %r8, %rdx
+; X64-NEXT: imulq %rcx, %rcx
+; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: shrdq $32, %rdx, %rax
+; X64-NEXT: shrq $32, %rdx
+; X64-NEXT: retq
+entry:
+ %conv = zext i96 %x to i192
+ %mul = mul nuw i192 %conv, %conv
+ %shr = lshr i192 %mul, 96
+ %conv2 = trunc i192 %shr to i96
+ ret i96 %conv2
+}
More information about the llvm-commits
mailing list