[llvm] 2778f9a - [DAG] SimplifyDemandedVectorElts - attempt to handle ADD(x,x) as single use
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 26 02:35:48 PST 2021
Author: Simon Pilgrim
Date: 2021-11-26T10:32:10Z
New Revision: 2778f9a9f6d8a0f62854906fa203843e20e26d7d
URL: https://github.com/llvm/llvm-project/commit/2778f9a9f6d8a0f62854906fa203843e20e26d7d
DIFF: https://github.com/llvm/llvm-project/commit/2778f9a9f6d8a0f62854906fa203843e20e26d7d.diff
LOG: [DAG] SimplifyDemandedVectorElts - attempt to handle ADD(x,x) as single use
If the ADD node is the only user of the repeated operand, then treat this as single use - allows us to peek through shl(x,1) patterns.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll
llvm/test/CodeGen/X86/rotate_vec.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 234370bf0e057..3d08f7e59ecc7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2863,9 +2863,19 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
+ case ISD::ADD: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1, /*AssumeSingleUse*/ true))
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ }
case ISD::OR:
case ISD::XOR:
- case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
case ISD::FSUB:
diff --git a/llvm/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll b/llvm/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll
index 4f6055dee62ac..bc1a58536c0f4 100644
--- a/llvm/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll
+++ b/llvm/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=armv7 | FileCheck %s
;
; Ensure that don't crash given a largeish power-of-two shufflevector index.
@@ -7,12 +8,9 @@
define i32 @foo(%struct.desc* %descs, i32 %num, i32 %cw) local_unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: mov r1, #32
-; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1
-; CHECK-NEXT: vld1.32 {d18, d19}, [r0]
-; CHECK-NEXT: vtrn.32 q8, q9
+; CHECK-NEXT: vldr d16, [r0, #32]
; CHECK-NEXT: vadd.i32 d16, d16, d16
-; CHECK-NEXT: vmov.32 r0, d16[1]
+; CHECK-NEXT: vmov.32 r0, d16[0]
; CHECK-NEXT: bx lr
entry:
%descs.vec = bitcast %struct.desc* %descs to <16 x i32>*
diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll
index c8a03098ebccb..c0992e558998c 100644
--- a/llvm/test/CodeGen/X86/rotate_vec.ll
+++ b/llvm/test/CodeGen/X86/rotate_vec.ll
@@ -138,7 +138,7 @@ define <4 x i32> @rot_v4i32_mask_ashr0(<4 x i32> %a0) {
define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
; XOPAVX1-LABEL: rot_v4i32_mask_ashr1:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vpshad {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vpsrad $25, %xmm0, %xmm0
; XOPAVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; XOPAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -146,7 +146,7 @@ define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
;
; XOPAVX2-LABEL: rot_v4i32_mask_ashr1:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: vpsrad $25, %xmm0, %xmm0
; XOPAVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; XOPAVX2-NEXT: vpbroadcastd %xmm0, %xmm0
; XOPAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -154,7 +154,7 @@ define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
;
; AVX512-LABEL: rot_v4i32_mask_ashr1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vpsrad $25, %xmm0, %xmm0
; AVX512-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
More information about the llvm-commits
mailing list