[llvm] f096d59 - [DAG] Fix SDLoc mismatch in (shl (srl x, c1), c2) -> and(shift(x,c3)) fold
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 15 03:08:08 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-15T11:07:59+01:00
New Revision: f096d5926ddc19030550f10589c3c219f340da6c
URL: https://github.com/llvm/llvm-project/commit/f096d5926ddc19030550f10589c3c219f340da6c
DIFF: https://github.com/llvm/llvm-project/commit/f096d5926ddc19030550f10589c3c219f340da6c.diff
LOG: [DAG] Fix SDLoc mismatch in (shl (srl x, c1), c2) -> and(shift(x,c3)) fold
Noticed by @craig.topper on D125836 which uses a tweaked copy of the same code.
Differential Revision: https://reviews.llvm.org/D127772
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
llvm/test/CodeGen/AMDGPU/load-lo16.ll
llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/rotate-extract.ll
llvm/test/CodeGen/X86/shift-mask.ll
llvm/test/CodeGen/X86/sse2-vector-shifts.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 14b10a5448c7..7e293bad2bf3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8956,7 +8956,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
/*AllowUndefs*/ false,
@@ -8966,7 +8966,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue Mask = DAG.getAllOnesConstant(DL, VT);
Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, Mask);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index fd2c131aaa71..d17ed9302fb5 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -262,8 +262,8 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SI-NEXT: v_add_i32_e32 v3, vcc, 9, v0
; SI-NEXT: v_and_b32_e32 v2, 0xff00, v0
-; SI-NEXT: v_and_b32_e32 v4, 0xff00, v1
; SI-NEXT: v_and_b32_e32 v3, 0xff, v3
+; SI-NEXT: v_and_b32_e32 v4, 0xff00, v1
; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v1
; SI-NEXT: v_or_b32_e32 v2, v2, v3
; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
@@ -347,8 +347,8 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; SI-NEXT: v_add_i32_e32 v3, vcc, 9, v0
; SI-NEXT: v_and_b32_e32 v2, 0xff00, v0
-; SI-NEXT: v_and_b32_e32 v4, 0xff00, v1
; SI-NEXT: v_and_b32_e32 v3, 0xff, v3
+; SI-NEXT: v_and_b32_e32 v4, 0xff00, v1
; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v1
; SI-NEXT: v_or_b32_e32 v2, v2, v3
; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 1ac91c35b9c8..5765b105ce35 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -595,12 +595,12 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_hi(i16 addrspace(3)* %in,
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: s_mov_b32 m0, -1
; GFX803-NEXT: ds_read_u16 v0, v0
-; GFX803-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
-; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX803-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX803-NEXT: v_mov_b32_e32 v3, 0
-; GFX803-NEXT: ds_write_b16 v3, v1
+; GFX803-NEXT: ds_write_b16 v3, v2
; GFX803-NEXT: s_waitcnt lgkmcnt(1)
-; GFX803-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX803-NEXT: v_or_b32_e32 v0, v0, v1
; GFX803-NEXT: flat_store_dword v[0:1], v0
; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX803-NEXT: s_setpc_b64 s[30:31]
@@ -647,12 +647,12 @@ define void @load_local_lo_v2i16_reghi_vreg_multi_use_lohi(i16 addrspace(3)* noa
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: s_mov_b32 m0, -1
; GFX803-NEXT: ds_read_u16 v0, v0
-; GFX803-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
-; GFX803-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; GFX803-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX803-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX803-NEXT: s_waitcnt lgkmcnt(0)
; GFX803-NEXT: ds_write_b16 v2, v0
-; GFX803-NEXT: ds_write_b16 v3, v1
-; GFX803-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX803-NEXT: ds_write_b16 v3, v4
+; GFX803-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX803-NEXT: flat_store_dword v[0:1], v0
; GFX803-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX803-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index f6cecaf9c792..321c7edfd7b0 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -581,13 +581,13 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1
; CI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
; CI-NEXT: s_mov_b64 s[2:3], s[6:7]
; CI-NEXT: s_waitcnt vmcnt(0)
-; CI-NEXT: v_lshrrev_b32_e32 v4, 8, v3
-; CI-NEXT: v_lshlrev_b32_e32 v3, 8, v3
-; CI-NEXT: v_and_b32_e32 v4, 0xff00, v4
-; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; CI-NEXT: v_lshlrev_b32_e32 v4, 8, v3
+; CI-NEXT: v_lshrrev_b32_e32 v3, 8, v3
; CI-NEXT: v_and_b32_e32 v3, 0xff00, v3
-; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; CI-NEXT: v_or_b32_e32 v3, v3, v4
+; CI-NEXT: v_lshlrev_b32_e32 v2, 8, v2
+; CI-NEXT: v_and_b32_e32 v4, 0xff00, v4
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v3, v4, v3
; CI-NEXT: v_and_b32_e32 v2, 0xff00ff00, v2
; CI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
; CI-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
index 5c36163f0407..affcd6f402fd 100644
--- a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -8,9 +8,9 @@ define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblo
; CHECK-NEXT: movl %edx, %edx
; CHECK-NEXT: movl (%rdi,%rdx,4), %edx
; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: addl $4, %eax
; CHECK-NEXT: shrq $6, %rdx
; CHECK-NEXT: andl $67108860, %edx # imm = 0x3FFFFFC
-; CHECK-NEXT: addl $4, %eax
; CHECK-NEXT: movl (%rdi,%rdx), %edx
; CHECK-NEXT: movzbl %dl, %edi
; CHECK-NEXT: shrl $8, %edx
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index 2fc75062bfe8..c4916b167845 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -337,8 +337,8 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
; SSE2-LABEL: combine_vec_shl_zext_lshr0:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
@@ -347,8 +347,8 @@ define <8 x i32> @combine_vec_shl_zext_lshr0(<8 x i16> %x) {
; SSE41-LABEL: combine_vec_shl_zext_lshr0:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
@@ -368,8 +368,8 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
; SSE2-LABEL: combine_vec_shl_zext_lshr1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
@@ -378,8 +378,8 @@ define <8 x i32> @combine_vec_shl_zext_lshr1(<8 x i16> %x) {
; SSE41-LABEL: combine_vec_shl_zext_lshr1:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index 54de5c2cafc3..901379b8d6df 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -165,18 +165,18 @@ define i32 @no_extract_shrl(i32 %i) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: andl $-8, %ecx
-; X86-NEXT: shll $25, %ecx
-; X86-NEXT: shrl $9, %eax
+; X86-NEXT: shrl $9, %ecx
+; X86-NEXT: andl $-8, %eax
+; X86-NEXT: shll $25, %eax
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: no_extract_shrl:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $-8, %eax
-; X64-NEXT: shll $25, %eax
-; X64-NEXT: shrl $9, %edi
+; X64-NEXT: shrl $9, %eax
+; X64-NEXT: andl $-8, %edi
+; X64-NEXT: shll $25, %edi
; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
%lhs_div = lshr i32 %i, 3
diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll
index 05cb78c1083e..4f3c1ac18667 100644
--- a/llvm/test/CodeGen/X86/shift-mask.ll
+++ b/llvm/test/CodeGen/X86/shift-mask.ll
@@ -258,11 +258,11 @@ define i64 @test_i64_shl_lshr_0(i64 %a0) {
define i64 @test_i64_shl_lshr_1(i64 %a0) {
; X86-LABEL: test_i64_shl_lshr_1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: leal (,%ecx,4), %eax
+; X86-NEXT: shldl $2, %eax, %edx
+; X86-NEXT: shll $2, %eax
; X86-NEXT: andl $-32, %eax
-; X86-NEXT: shldl $2, %ecx, %edx
; X86-NEXT: retl
;
; X64-MASK-LABEL: test_i64_shl_lshr_1:
@@ -288,8 +288,8 @@ define i64 @test_i64_shl_lshr_2(i64 %a0) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shrdl $2, %edx, %eax
-; X86-NEXT: andl $-8, %eax
; X86-NEXT: shrl $2, %edx
+; X86-NEXT: andl $-8, %eax
; X86-NEXT: retl
;
; X64-MASK-LABEL: test_i64_shl_lshr_2:
diff --git a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
index 7e6c92d00656..0a7cd3392e66 100644
--- a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
+++ b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -321,8 +321,8 @@ define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
; CHECK-LABEL: shl_zext_srl_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
%srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
More information about the llvm-commits
mailing list