[llvm] e4d0e12 - [DAG] Fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2) (REAPPLIED)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 6 05:19:57 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-06T13:19:42+01:00
New Revision: e4d0e1209934ee8885fb4c3f046f9aa29c660d55
URL: https://github.com/llvm/llvm-project/commit/e4d0e1209934ee8885fb4c3f046f9aa29c660d55
DIFF: https://github.com/llvm/llvm-project/commit/e4d0e1209934ee8885fb4c3f046f9aa29c660d55.diff
LOG: [DAG] Fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2) (REAPPLIED)
Assuming the ADD is nsw then it may be sign-extended to merge with a SHL op in a similar fold to the existing (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) fold.
This is most useful for helping to expose address math for X86, but has also touched several aarch64 test cases as well.
Alive2: https://alive2.llvm.org/ce/z/2UpSbJ
Differential Revision: https://reviews.llvm.org/D159198
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d2df87b6f85a23..408bd5ae3aa9a1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10009,6 +10009,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
+ // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
+ // TODO: Add zext/add_nuw variant with suitable test coverage
+ // TODO: Should we limit this with isLegalAddImmediate?
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::ADD &&
+ N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
+ N0.getOperand(0)->hasOneUse() &&
+ TLI.isDesirableToCommuteWithShift(N, Level)) {
+ SDValue Add = N0.getOperand(0);
+ SDLoc DL(N0);
+ if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
+ {Add.getOperand(1)})) {
+ if (SDValue ShlC =
+ DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
+ SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
+ SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
+ return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
+ }
+ }
+ }
+
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
SDValue N01 = N0.getOperand(1);
diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index ff762920f746c8..da6499b7daa82e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -275,8 +275,9 @@ entry:
define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
; CHECK-LABEL: extendedLeftShiftintToint64By4:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: add w8, w0, #1
-; CHECK-NEXT: sbfiz x0, x8, #4, #32
+; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sbfiz x8, x0, #4, #32
+; CHECK-NEXT: add x0, x8, #16
; CHECK-NEXT: ret
entry:
%inc = add nsw i32 %a, 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
index 31a649ad64f448..cd47fff46729f9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -20,10 +20,10 @@ define void @fct32(i32 %arg, i64 %var) {
; CHECK-LABEL: fct32:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr32
-; CHECK-NEXT: sub w9, w0, #1
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr32]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: str w1, [x8, w9, sxtw #2]
+; CHECK-NEXT: add x8, x8, w0, sxtw #2
+; CHECK-NEXT: stur w1, [x8, #-4]
; CHECK-NEXT: ret
bb:
%.pre37 = load ptr, ptr @zptr32, align 8
@@ -39,10 +39,10 @@ define void @fct16(i32 %arg, i64 %var) {
; CHECK-LABEL: fct16:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr16
-; CHECK-NEXT: sub w9, w0, #1
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr16]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: strh w1, [x8, w9, sxtw #1]
+; CHECK-NEXT: add x8, x8, w0, sxtw #1
+; CHECK-NEXT: sturh w1, [x8, #-2]
; CHECK-NEXT: ret
bb:
%.pre37 = load ptr, ptr @zptr16, align 8
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
index 1cfeaf01cd8a00..daba729bf040f2 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher-2.ll
@@ -52,8 +52,8 @@ define void @foo_sext_nsw(i1 zeroext, i32) nounwind {
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; X64-NEXT: cltq
-; X64-NEXT: leaq 4(,%rax,4), %rax
-; X64-NEXT: leaq (%rax,%rax,4), %rdi
+; X64-NEXT: shlq $2, %rax
+; X64-NEXT: leaq 20(%rax,%rax,4), %rdi
; X64-NEXT: callq bar at PLT
; X64-NEXT: jmp .LBB0_2
br i1 %0, label %9, label %3
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll
index 434799dc886365..f7bdaf39051dfd 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/flat_atomic.ll
@@ -8,18 +8,17 @@ declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr nocapture, double) #
define protected amdgpu_kernel void @InferNothing(i32 %a, ptr %b, double %c) {
; CHECK-LABEL: InferNothing:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x24
+; CHECK-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_ashr_i32 s3, s2, 31
+; CHECK-NEXT: s_lshl_b64 s[0:1], s[2:3], 3
+; CHECK-NEXT: s_add_u32 s0, s0, s4
+; CHECK-NEXT: s_addc_u32 s1, s1, s5
; CHECK-NEXT: v_mov_b32_e32 v0, s6
-; CHECK-NEXT: s_add_i32 s0, s2, -1
-; CHECK-NEXT: s_ashr_i32 s1, s0, 31
-; CHECK-NEXT: s_lshl_b64 s[0:1], s[0:1], 3
-; CHECK-NEXT: s_add_u32 s0, s4, s0
-; CHECK-NEXT: s_addc_u32 s1, s5, s1
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
-; CHECK-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
+; CHECK-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] offset:65528
; CHECK-NEXT: s_endpgm
entry:
%i = add nsw i32 %a, -1
More information about the llvm-commits
mailing list