[PATCH] D59556: [AMDGPU] Fixed i64 add/sub used in lowering of i64 srem

Tue Mar 19 13:07:38 PDT 2019

tpr created this revision.
Herald added subscribers: llvm-commits, t-tye, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.

My commit rL356399 <https://reviews.llvm.org/rL356399> "[AMDGPU] Asm/disasm clamp modifier on vop3 int arithmetic"
broke a case of i64 srem being lowered. Fixed.

Change-Id: Id274ae6ac3c8687a23999ea239f383b37d812fab


Repository:
  rL LLVM

https://reviews.llvm.org/D59556

Files:
  lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
  test/CodeGen/AMDGPU/sremi64.ll


Index: test/CodeGen/AMDGPU/sremi64.ll
===================================================================

--- /dev/null
+++ test/CodeGen/AMDGPU/sremi64.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN,GFX678 %s
+; RUN: llc -march=amdgcn -mcpu=gfx704 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN,GFX678 %s
+; RUN: llc -march=amdgcn -mcpu=gfx802 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN,GFX678 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs <%s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; GCN-LABEL: wrapper:
+; GFX678: v_add_{{[iu]}}32_e32
+; GFX9: v_add_co_u32_e32
+; GFX678: v_addc_u32_e32
+; GFX9: v_addc_co_u32_e32
+
+; This was lowering the i64 srem with V_ADD_I32_e64 etc ops without the required clamp bit operand.
+
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg)
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1 immarg, i1 immarg)
+declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg, i1 immarg)
+
+define amdgpu_gs void @wrapper(i32 inreg %arg4) {
+main_body:
+  %tmp = call nsz float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 4, i32 0)
+  %tmp14 = bitcast float %tmp to i32
+  %tmp15 = insertelement <2 x i32> undef, i32 %tmp14, i32 1
+  %tmp16 = bitcast <2 x i32> %tmp15 to i64
+  %tmp17 = call nsz float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 32, i32 0)
+  %tmp18 = bitcast float %tmp17 to i32
+  %tmp19 = insertelement <2 x i32> undef, i32 %tmp18, i32 0
+  %tmp20 = insertelement <2 x i32> %tmp19, i32 undef, i32 1
+  %tmp21 = bitcast <2 x i32> %tmp20 to i64
+  %tmp22 = srem i64 %tmp16, %tmp21
+  %tmp23 = icmp eq i64 %tmp22, 0
+  %tmp24 = icmp eq i64 0, 0
+  %tmp25 = icmp eq i64 0, 0
+  %tmp26 = and i1 %tmp24, %tmp25
+  %tmp27 = and i1 %tmp23, %tmp26
+  %tmp28 = select i1 %tmp27, i32 0, i32 1065353216
+  call void @llvm.amdgcn.tbuffer.store.i32(i32 undef, <4 x i32> undef, i32 0, i32 64, i32 %arg4, i32 0, i32 4, i32 4, i1 true, i1 true)
+  call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp28, <4 x i32> undef, i32 0, i32 56, i32 %arg4, i32 0, i32 4, i32 4, i1 true, i1 true)
+  ret void
+}
+
Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -931,9 +931,10 @@
   unsigned Opc = N->getOpcode() == ISD::UADDO ?
     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
 
-  CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
-                       {N->getOperand(0), N->getOperand(1),
-                        CurDAG->getConstant(0, {}, MVT::i1)/*clamp bit*/});
+  CurDAG->SelectNodeTo(
+      N, Opc, N->getVTList(),
+      {N->getOperand(0), N->getOperand(1),
+       CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
 }
 
 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59556.191377.patch
Type: text/x-patch
Size: 3010 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190319/162d1765/attachment.bin>