[PATCH] R600: Compute masked bits for min and max

Wed Mar 26 15:10:28 PDT 2014

http://llvm-reviews.chandlerc.com/D3195

Files:
  lib/Target/R600/AMDGPUISelLowering.cpp
  test/CodeGen/R600/llvm.AMDGPU.umax.ll
  test/CodeGen/R600/llvm.AMDGPU.umin.ll

Index: lib/Target/R600/AMDGPUISelLowering.cpp
===================================================================

--- lib/Target/R600/AMDGPUISelLowering.cpp
+++ lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1177,11 +1177,55 @@
   }
 }
 
+static void computeMaskedBitsForMinMax(const SDValue Op0,
+                                       const SDValue Op1,
+                                       APInt &KnownZero,
+                                       APInt &KnownOne,
+                                       const SelectionDAG &DAG,
+                                       unsigned Depth) {
+  APInt Op0Zero, Op0One;
+  APInt Op1Zero, Op1One;
+  DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
+  DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
+
+  KnownZero = Op0Zero & Op1Zero;
+  KnownOne = Op0One & Op1One;
+}
+
 void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
   const SDValue Op,
   APInt &KnownZero,
   APInt &KnownOne,
   const SelectionDAG &DAG,
   unsigned Depth) const {
+
   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  case ISD::INTRINSIC_WO_CHAIN: {
+    // FIXME: The intrinsic should just use the node.
+    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+    case AMDGPUIntrinsic::AMDGPU_imax:
+    case AMDGPUIntrinsic::AMDGPU_umax:
+    case AMDGPUIntrinsic::AMDGPU_imin:
+    case AMDGPUIntrinsic::AMDGPU_umin:
+      computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
+                                 KnownZero, KnownOne, DAG, Depth);
+      break;
+    default:
+      break;
+    }
+
+    break;
+  }
+  case AMDGPUISD::SMAX:
+  case AMDGPUISD::UMAX:
+  case AMDGPUISD::SMIN:
+  case AMDGPUISD::UMIN:
+    computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
+                               KnownZero, KnownOne, DAG, Depth);
+    break;
+  default:
+    break;
+  }
 }
Index: test/CodeGen/R600/llvm.AMDGPU.umax.ll
===================================================================
--- test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -21,6 +21,21 @@
   ret void
 }
 
+; SI-LABEL: @trunc_zext_umax
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp2 = zext i8 %tmp5 to i32
+  %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
+  %tmp4 = trunc i32 %tmp3 to i8
+  %tmp6 = zext i8 %tmp4 to i16
+  store i16 %tmp6, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
 ; Function Attrs: readnone
 declare i32 @llvm.AMDGPU.umax(i32, i32) #1
 
Index: test/CodeGen/R600/llvm.AMDGPU.umin.ll
===================================================================
--- test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -21,6 +21,21 @@
   ret void
 }
 
+; SI-LABEL: @trunc_zext_umin
+; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
+; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: AND
+; SI: BUFFER_STORE_SHORT [[RESULT]],
+define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
+  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp2 = zext i8 %tmp5 to i32
+  %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
+  %tmp4 = trunc i32 %tmp3 to i8
+  %tmp6 = zext i8 %tmp4 to i16
+  store i16 %tmp6, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
 ; Function Attrs: readnone
 declare i32 @llvm.AMDGPU.umin(i32, i32) #1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D3195.1.patch
Type: text/x-patch
Size: 3716 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140326/cea27b11/attachment.bin>