[llvm] 82b7436 - [DAG] reassociateOpsCommutative - peek through bitcasts to find constants
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 11 04:00:37 PST 2021
Author: Simon Pilgrim
Date: 2021-11-11T12:00:22Z
New Revision: 82b74363a943b570c4ee7799d5f3ee4b3e7163a5
URL: https://github.com/llvm/llvm-project/commit/82b74363a943b570c4ee7799d5f3ee4b3e7163a5
DIFF: https://github.com/llvm/llvm-project/commit/82b74363a943b570c4ee7799d5f3ee4b3e7163a5.diff
LOG: [DAG] reassociateOpsCommutative - peek through bitcasts to find constants
Now that FoldConstantArithmetic can fold bitcasted constants, we should peek through bitcasts of binop operands to try and find foldable constants
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index be52fd25c006..60c5328d3e90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1066,8 +1066,8 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- if (DAG.isConstantIntBuildVectorOrConstantInt(N01)) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
return DAG.getNode(Opc, DL, VT, N00, OpNode);
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
index 9650cfd69249..88ac75bb9faf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
@@ -46,9 +46,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
; CHECK-LABEL: add_v2i32_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s6
-; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r1, s4
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
@@ -58,7 +56,8 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
-; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
@@ -427,7 +426,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: cmp r0, #0
@@ -439,12 +437,11 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
-; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: orrs r1, r3
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i16> %b, zeroinitializer
@@ -1386,7 +1383,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: cmp r0, #0
@@ -1398,12 +1394,11 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
-; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r0, r1, d1
-; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
-; CHECK-NEXT: orrs r1, r3
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i8> %b, zeroinitializer
@@ -1530,9 +1525,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b,
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r2, s6
-; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s4
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
@@ -1542,7 +1535,8 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b,
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
-; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
+; CHECK-NEXT: vand q0, q0, q1
+; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: vmov r3, r2, d0
@@ -1900,11 +1894,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: cmp r2, #0
@@ -1916,15 +1907,14 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b,
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
-; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r12, lr, d1
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: add r2, r12
-; CHECK-NEXT: orr.w r3, r3, lr
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, r12, d0
+; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
-; CHECK-NEXT: adcs r1, r3
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: adc.w r1, r1, r12
+; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i16> %b, zeroinitializer
%xx = zext <2 x i16> %x to <2 x i64>
@@ -2600,11 +2590,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
-; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: cmp r2, #0
@@ -2616,15 +2603,14 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
-; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
; CHECK-NEXT: vand q0, q0, q1
-; CHECK-NEXT: vmov r12, lr, d1
-; CHECK-NEXT: vmov r2, r3, d0
-; CHECK-NEXT: add r2, r12
-; CHECK-NEXT: orr.w r3, r3, lr
+; CHECK-NEXT: vand q0, q0, q2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r3, r12, d0
+; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
-; CHECK-NEXT: adcs r1, r3
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: adc.w r1, r1, r12
+; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i8> %b, zeroinitializer
%xx = zext <2 x i8> %x to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 723eae9fb2f4..84858e56f876 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -1937,9 +1937,8 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_cmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
@@ -1958,26 +1957,25 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
-; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
-; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
-; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
-; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
+; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
-; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
-; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
-; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X86-NEXT: kmovd %k2, %esi # encoding: [0xc5,0xfb,0x93,0xf2]
+; X86-NEXT: adcl %edx, %esi # encoding: [0x11,0xd6]
; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
-; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
-; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
+; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
+; X86-NEXT: adcl %esi, %edx # encoding: [0x11,0xf2]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x08]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x0c]
; X86-NEXT: popl %esi # encoding: [0x5e]
-; X86-NEXT: popl %edi # encoding: [0x5f]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -2112,9 +2110,8 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
@@ -2133,26 +2130,25 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
-; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
-; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
-; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
-; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
+; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
+; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
+; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
-; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
-; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
-; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
+; X86-NEXT: kmovd %k2, %esi # encoding: [0xc5,0xfb,0x93,0xf2]
+; X86-NEXT: adcl %edx, %esi # encoding: [0x11,0xd6]
; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
-; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
-; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
-; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
+; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
+; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
+; X86-NEXT: adcl %esi, %edx # encoding: [0x11,0xf2]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x08]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x0c]
; X86-NEXT: popl %esi # encoding: [0x5e]
-; X86-NEXT: popl %edi # encoding: [0x5f]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
More information about the llvm-commits
mailing list