[llvm] r267209 - DAGCombiner: Relax alignment restriction when changing load type
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 22 13:21:36 PDT 2016
Author: arsenm
Date: Fri Apr 22 15:21:36 2016
New Revision: 267209
URL: http://llvm.org/viewvc/llvm-project?rev=267209&view=rev
Log:
DAGCombiner: Relax alignment restriction when changing load type
If the target allows the alignment, this should still be OK.
Added:
llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=267209&r1=267208&r2=267209&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Apr 22 15:21:36 2016
@@ -268,8 +268,21 @@ public:
/// efficiently, casting the load to a smaller vector of larger types and
/// loading is more efficient, however, this can be undone by optimizations in
/// dag combiner.
- virtual bool isLoadBitCastBeneficial(EVT /* Load */,
- EVT /* Bitcast */) const {
+ virtual bool isLoadBitCastBeneficial(EVT LoadVT,
+ EVT BitcastVT) const {
+ // Don't do if we could do an indexed load on the original type, but not on
+ // the new one.
+ if (!LoadVT.isSimple() || !BitcastVT.isSimple())
+ return true;
+
+ MVT LoadMVT = LoadVT.getSimpleVT();
+
+ // Don't bother doing this if it's just going to be promoted again later, as
+ // doing so might interfere with other combines.
+ if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
+ getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
+ return false;
+
return true;
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=267209&r1=267208&r2=267209&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Apr 22 15:21:36 2016
@@ -7366,11 +7366,12 @@ SDValue DAGCombiner::visitBITCAST(SDNode
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned Align = DAG.getDataLayout().getABITypeAlignment(
- VT.getTypeForEVT(*DAG.getContext()));
unsigned OrigAlign = LN0->getAlignment();
- if (Align <= OrigAlign) {
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ LN0->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
Added: llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll?rev=267209&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll Fri Apr 22 15:21:36 2016
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; GCN-LABEL: {{^}}reduce_i64_load_align_4_width_to_i32:
+; GCN: buffer_load_dword [[VAL:v[0-9]+]]
+; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, [[VAL]]
+; GCN: buffer_store_dwordx2
+define void @reduce_i64_load_align_4_width_to_i32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %a = load i64, i64 addrspace(1)* %in, align 4
+ %and = and i64 %a, 1234567
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt0:
+; GCN: buffer_load_dword [[VAL:v[0-9]+]]
+; GCN: buffer_store_dword [[VAL]]
+define void @reduce_i64_align_4_bitcast_v2i32_elt0(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %a = load i64, i64 addrspace(1)* %in, align 4
+ %vec = bitcast i64 %a to <2 x i32>
+ %elt0 = extractelement <2 x i32> %vec, i32 0
+ store i32 %elt0, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
+; GCN: buffer_load_dword [[VAL:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; GCN: buffer_store_dword [[VAL]]
+define void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %a = load i64, i64 addrspace(1)* %in, align 4
+ %vec = bitcast i64 %a to <2 x i32>
+ %elt0 = extractelement <2 x i32> %vec, i32 1
+ store i32 %elt0, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=267209&r1=267208&r2=267209&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Fri Apr 22 15:21:36 2016
@@ -53,7 +53,7 @@ define void @mask16_mem(i16* %ptr) {
define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL: ## BB#0:
-; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: movzbw (%rdi), %ax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
@@ -951,7 +951,7 @@ define <16 x i32> @load_16i1(<16 x i1>*
define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-LABEL: load_2i1:
; KNL: ## BB#0:
-; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: movzbw (%rdi), %ax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: retq
@@ -969,7 +969,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-LABEL: load_4i1:
; KNL: ## BB#0:
-; KNL-NEXT: movb (%rdi), %al
+; KNL-NEXT: movzbw (%rdi), %ax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=267209&r1=267208&r2=267209&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Fri Apr 22 15:21:36 2016
@@ -291,7 +291,7 @@ define <8 x i32> @test7(i32* %base, <8 x
; KNL_32-LABEL: test7:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %cl
+; KNL_32-NEXT: movzbw {{[0-9]+}}(%esp), %cx
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
; KNL_32-NEXT: kmovw %k1, %k2
Modified: llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll?rev=267209&r1=267208&r2=267209&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/merge-consecutive-loads-512.ll Fri Apr 22 15:21:36 2016
@@ -234,7 +234,7 @@ define <8 x i64> @merge_8i64_i64_1u3u5zu
; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovdqu32 8(%eax), %zmm0
+; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
; X32-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
More information about the llvm-commits
mailing list