[PATCH] D30215: DAG: Fold bitcast/extract_vector_elt of undef to undef
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 20 16:57:20 PDT 2017
Matt Arsenault via Phabricator <reviews at reviews.llvm.org> writes:
> arsenm created this revision.
> Herald added subscribers: nhaehnle, wdng.
>
> Fixes not eliminating store when intrinsic is lowered to undef.
LGTM. Minor comment below that you can do what you want with.
> https://reviews.llvm.org/D30215
>
> Files:
> lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
>
> Index: test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> ===================================================================
> --- test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> +++ test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> @@ -22,8 +22,7 @@
>
> ; GCN-LABEL: {{^}}v_fract_undef_f32:
> ; GCN-NOT: v_fract_f32
> -; GCN-NOT: v0
> -; GCN: buffer_store_dword v0
> +; GCN-NOT: store_dword
> define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
> %fract = call float @llvm.amdgcn.fract.f32(float undef)
> store float %fract, float addrspace(1)* %out
> Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> ===================================================================
> --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> @@ -67,3 +67,27 @@
> store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
> ret void
> }
> +
> +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
> +; GCN-NOT: store_dword
> +define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
> + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
> + %bc = bitcast i64 %undef to <2 x i32>
> + store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
> + ret void
> +}
> +
> +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
> +; GCN-NOT: store_dword
> +define void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
> + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
> + %bc = bitcast i64 %undef to <2 x i32>
> + %elt1 = extractelement <2 x i32> %bc, i32 1
> + store volatile i32 %elt1, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { nounwind readnone convergent }
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -8014,6 +8014,9 @@
> return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
> }
>
> + if (N0.isUndef())
> + return DAG.getUNDEF(VT);
> +
> // If the input is a constant, let getNode fold it.
> if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
> // If we can't allow illegal operations, we need to check that this is just
> @@ -12978,6 +12981,9 @@
> return InOp;
> }
>
> + if (InVec.isUndef())
> + return DAG.getUNDEF(NVT);
These could both be moved a little earlier in their functions if you
want - obviously undef is neither BUILD_VECTOR nor SCALAR_TO_VECTOR, so
those conditions will always be false when we get here.
> +
> SDValue EltNo = N->getOperand(1);
> ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
More information about the llvm-commits
mailing list