[PATCH] D31400: DAG: Fix mis-legalization of i1 zextload

Mon Mar 27 09:52:50 PDT 2017

arsenm created this revision.
Herald added subscribers: nhaehnle, wdng.

The bits aren't guaranteed to be zero.


https://reviews.llvm.org/D31400

Files:
  lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
  test/CodeGen/AMDGPU/ctlz.ll
  test/CodeGen/AMDGPU/load-local-i1.ll
  test/CodeGen/AMDGPU/udiv.ll


Index: test/CodeGen/AMDGPU/udiv.ll
===================================================================

--- test/CodeGen/AMDGPU/udiv.ll
+++ test/CodeGen/AMDGPU/udiv.ll
@@ -128,8 +128,11 @@
 }
 
 ; FUNC-LABEL: {{^}}v_udiv_i23:
+; SI: s_mov_b32 [[MASK:s[0-9]+]], 0x7fffff
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]],
 ; SI: v_rcp_f32
-; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
+; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], [[MASK]], v{{[0-9]+}}
 ; SI: buffer_store_dword [[TRUNC]]
 define amdgpu_kernel void @v_udiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) {
   %den_ptr = getelementptr i23, i23 addrspace(1)* %in, i23 1
Index: test/CodeGen/AMDGPU/load-local-i1.ll
===================================================================
--- test/CodeGen/AMDGPU/load-local-i1.ll
+++ test/CodeGen/AMDGPU/load-local-i1.ll
@@ -66,8 +66,9 @@
 }
 
 ; FUNC-LABEL: {{^}}local_zextload_i1_to_i32:
-; GCN: ds_read_u8
-; GCN: ds_write_b32
+; GCN: ds_read_u8 [[LOAD:v[0-9]+]]
+; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[LOAD]]
+; GCN: ds_write_b32 v{{[0-9]+}}, [[AND]]
 define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 {
   %a = load i1, i1 addrspace(3)* %in
   %ext = zext i1 %a to i32
Index: test/CodeGen/AMDGPU/ctlz.ll
===================================================================
--- test/CodeGen/AMDGPU/ctlz.ll
+++ test/CodeGen/AMDGPU/ctlz.ll
@@ -255,10 +255,14 @@
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
+; FIXME: Why difference with VI?
 ; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
-; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
+; GCN-DAG: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
+; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x7f
+; VI: v_and_b32_e32 [[VAL]], 0x7f, [[VAL]]
 ; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
-; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
+; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], [[MASK]], [[FFBH]]
+; VI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
 ; GCN: {{buffer|flat}}_store_byte [[TRUNC]],
 define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x()
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -700,11 +700,9 @@
       Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
                            Result.getValueType(),
                            Result, DAG.getValueType(SrcVT));
-    else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
-      // All the top bits are guaranteed to be zero - inform the optimizers.
-      Result = DAG.getNode(ISD::AssertZext, dl,
-                           Result.getValueType(), Result,
-                           DAG.getValueType(SrcVT));
+    else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) {
+      Result = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+    }
 
     Value = Result;
     Chain = Ch;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31400.93146.patch
Type: text/x-patch
Size: 3159 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170327/18f6a827/attachment.bin>