[PATCH] D30215: DAG: Fold bitcast/extract_vector_elt of undef to undef

Mon Mar 20 16:57:20 PDT 2017

Matt Arsenault via Phabricator <reviews at reviews.llvm.org> writes:
> arsenm created this revision.
> Herald added subscribers: nhaehnle, wdng.
>
> Fixes not eliminating store when intrinsic is lowered to undef.

LGTM. Minor comment below that you can do what you want with.

> https://reviews.llvm.org/D30215
>
> Files:
>   lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>   test/CodeGen/AMDGPU/bitcast-vector-extract.ll
>   test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
>
> Index: test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> ===================================================================
> --- test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> +++ test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
> @@ -22,8 +22,7 @@
>  
>  ; GCN-LABEL: {{^}}v_fract_undef_f32:
>  ; GCN-NOT: v_fract_f32
> -; GCN-NOT: v0
> -; GCN: buffer_store_dword v0
> +; GCN-NOT: store_dword
>  define void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
>    %fract = call float @llvm.amdgcn.fract.f32(float undef)
>    store float %fract, float addrspace(1)* %out
> Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> ===================================================================
> --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll
> @@ -67,3 +67,27 @@
>    store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
>    ret void
>  }
> +
> +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
> +; GCN-NOT: store_dword
> +define void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
> +  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
> +  %bc = bitcast i64 %undef to <2 x i32>
> +  store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
> +  ret void
> +}
> +
> +; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
> +; GCN-NOT: store_dword
> +define void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
> +  %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
> +  %bc = bitcast i64 %undef to <2 x i32>
> +  %elt1 = extractelement <2 x i32> %bc, i32 1
> +  store volatile i32 %elt1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
> +
> +attributes #0 = { nounwind }
> +attributes #1 = { nounwind readnone convergent }
> Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> ===================================================================
> --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> @@ -8014,6 +8014,9 @@
>        return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
>    }
>  
> +  if (N0.isUndef())
> +    return DAG.getUNDEF(VT);
> +
>    // If the input is a constant, let getNode fold it.
>    if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
>      // If we can't allow illegal operations, we need to check that this is just
> @@ -12978,6 +12981,9 @@
>      return InOp;
>    }
>  
> +  if (InVec.isUndef())
> +    return DAG.getUNDEF(NVT);

These could both be moved a little earlier in their functions if you
want - obviously undef is neither BUILD_VECTOR nor SCALAR_TO_VECTOR, so
those conditions will always be false when we get here.

> +
>    SDValue EltNo = N->getOperand(1);
>    ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);