[llvm] r215732 - R600/SI: Implement isLegalAddressingMode

Fri Aug 15 10:17:07 PDT 2014

Author: arsenm
Date: Fri Aug 15 12:17:07 2014
New Revision: 215732

URL: http://llvm.org/viewvc/llvm-project?rev=215732&view=rev
Log:
R600/SI: Implement isLegalAddressingMode

The default assumes that a 16-bit signed offset is used.
LDS instruction use a 16-bit unsigned offset, so it wasn't
being used in some cases where it was assumed a negative offset
could be used.

More should be done here, but first isLegalAddressingMode needs
to gain an addressing mode argument. For now, copy most of the rest
of the default implementation with the immediate offset change.

Added:
    llvm/trunk/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
Modified:
    llvm/trunk/lib/Target/R600/SIISelLowering.cpp
    llvm/trunk/lib/Target/R600/SIISelLowering.h

Modified: llvm/trunk/lib/Target/R600/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.cpp?rev=215732&r1=215731&r2=215732&view=diff
==============================================================================

--- llvm/trunk/lib/Target/R600/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.cpp Fri Aug 15 12:17:07 2014
@@ -242,6 +242,49 @@ SITargetLowering::SITargetLowering(Targe
 // TargetLowering queries
 //===----------------------------------------------------------------------===//
 
+// FIXME: This really needs an address space argument. The immediate offset
+// size is different for different sets of memory instruction sets.
+
+// The single offset DS instructions have a 16-bit unsigned byte offset.
+//
+// MUBUF / MTBUF have a 12-bit unsigned byte offset, and additionally can do r +
+// r + i with addr64. 32-bit has more addressing mode options. Depending on the
+// resource constant, it can also do (i64 r0) + (i32 r1) * (i14 i).
+//
+// SMRD instructions have an 8-bit, dword offset.
+//
+bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                             Type *Ty) const {
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // Allow a 16-bit unsigned immediate field, since this is what DS instructions
+  // use.
+  if (!isUInt<16>(AM.BaseOffs))
+    return false;
+
+  // Only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  default: // Don't allow n * r
+    return false;
+  }
+
+  return true;
+}
+
 bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT  VT,
                                                       unsigned AddrSpace,
                                                       unsigned Align,

Modified: llvm/trunk/lib/Target/R600/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIISelLowering.h?rev=215732&r1=215731&r2=215732&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/SIISelLowering.h Fri Aug 15 12:17:07 2014
@@ -59,6 +59,10 @@ class SITargetLowering : public AMDGPUTa
 
 public:
   SITargetLowering(TargetMachine &tm);
+
+  bool isLegalAddressingMode(const AddrMode &AM,
+                             Type *Ty) const override;
+
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
                                       bool *IsFast) const override;

Added: llvm/trunk/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll?rev=215732&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll (added)
+++ llvm/trunk/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll Fri Aug 15 12:17:07 2014
@@ -0,0 +1,60 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local() #1
+
+; Function Attrs: nounwind
+; SI-LABEL: @signed_ds_offset_addressing_loop
+; SI: BB0_1:
+; SI: V_ADD_I32_e32 [[VADDR:v[0-9]+]],
+; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x0
+; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x4
+; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x80
+; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x84
+; SI-DAG: DS_READ_B32 v{{[0-9]+}}, [[VADDR]], 0x100
+; SI: S_ENDPGM
+define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
+entry:
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #0
+  %mul = shl nsw i32 %x.i, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
+  %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
+  %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.AMDGPU.barrier.local() #1
+  %arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
+  %tmp = load float addrspace(3)* %arrayidx, align 4
+  %add1 = add nsw i32 %offset.02, 1
+  %arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
+  %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+  %add3 = add nsw i32 %offset.02, 32
+  %arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
+  %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+  %add5 = add nsw i32 %offset.02, 33
+  %arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
+  %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+  %add7 = add nsw i32 %offset.02, 64
+  %arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
+  %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+  %add9 = fadd float %tmp, %tmp1
+  %add10 = fadd float %add9, %tmp2
+  %add11 = fadd float %add10, %tmp3
+  %add12 = fadd float %add11, %tmp4
+  %add13 = fadd float %sum.03, %add12
+  %inc = add nsw i32 %k.01, 1
+  %add14 = add nsw i32 %offset.02, 97
+  %exitcond = icmp eq i32 %inc, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %tmp5 = sext i32 %x.i to i64
+  %arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
+  store float %add13, float addrspace(1)* %arrayidx15, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }