PATCH: Add getVectorIdxTy() function to TargetLowering

Fri Jul 19 23:40:48 PDT 2013

On Fri, Jul 19, 2013 at 01:14:30PM -0700, Tom Stellard wrote:
> Hi,
> 
> The attached patch adds the virtual function getVectorIdxTy() to the
> TargetLowering class.  There are two motivations for this new function,
> the first is to clean up some bad code that is generated when using
> dynamic indexing of vectors in the R600 target.  The second is to reduce
> the use of TargetLowering::getPointerTy() in the SelectionDAG code which
> will help anyone who is trying to improve support for targets whose
> pointer size differs across address spaces.
> 
> Please Review.
> 

I forgot to add the test case to source control before generating the
patch, so here is an updated version of patch #2 with a test case.

-Tom

-------------- next part --------------
>From 3faf808672cb1040cbc70a7d9418c0079e11e140 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Fri, 19 Jul 2013 11:07:35 -0700
Subject: [PATCH 2/2] R600: Implement TargetLowering::getVectorIdxTy() v2

We use MVT::i32 for the vector index type, because we use 32-bit
operations to caculate offsets when dynamically indexing vectors.

v2:
  - Add test case
---
 lib/Target/R600/AMDGPUISelLowering.cpp      |  9 ++++++
 lib/Target/R600/AMDGPUISelLowering.h        |  1 +
 lib/Target/R600/SIInstructions.td           |  8 ++---
 test/CodeGen/R600/indirect-addressing-si.ll | 48 +++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 4 deletions(-)
 create mode 100644 test/CodeGen/R600/indirect-addressing-si.ll

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 2a4e44f..0f4195f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -110,6 +110,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Target Information
+//===----------------------------------------------------------------------===//
+
+MVT AMDGPUTargetLowering::getVectorIdxTy() const {
+  return MVT::i32;
+}
+
+
 //===---------------------------------------------------------------------===//
 // TargetLowering Callbacks
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 7f4468c..cc22889 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -49,6 +49,7 @@ protected:
 public:
   AMDGPUTargetLowering(TargetMachine &TM);
 
+  virtual MVT getVectorIdxTy() const;
   virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                               bool isVarArg,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 789a518..355dd67 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1782,25 +1782,25 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
 
   // 1. Extract with offset
   def : Pat<
-    (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
+    (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
     (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
   >;
 
   // 2. Extract without offset
   def : Pat<
-    (vector_extract vt:$vec, (i64 (zext i32:$idx))),
+    (vector_extract vt:$vec, i32:$idx),
     (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
   >;
 
   // 3. Insert with offset
   def : Pat<
-    (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
+    (vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)),
     (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
   >;
 
   // 4. Insert without offset
   def : Pat<
-    (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
+    (vector_insert vt:$vec, f32:$val, i32:$idx),
     (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
   >;
 }
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
new file mode 100644
index 0000000..ba5de22
--- /dev/null
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+
+; Tests for indirect addressing on SI, which is implemented using dynamic
+; indexing of vectors.
+
+; CHECK: extract_w_offset
+; CHECK: S_MOV_B32 M0
+; CHECK-NEXT: V_MOVRELS_B32_e32
+define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 %in, 1
+  %1 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: extract_wo_offset
+; CHECK: S_MOV_B32 M0
+; CHECK-NEXT: V_MOVRELS_B32_e32
+define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: insert_w_offset
+; CHECK: S_MOV_B32 M0
+; CHECK-NEXT: V_MOVRELD_B32_e32
+define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = add i32 %in, 1
+  %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
+  %2 = extractelement <4 x float> %1, i32 2
+  store float %2, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: insert_wo_offset
+; CHECK: S_MOV_B32 M0
+; CHECK-NEXT: V_MOVRELD_B32_e32
+define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
+entry:
+  %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
+  %1 = extractelement <4 x float> %0, i32 2
+  store float %1, float addrspace(1)* %out
+  ret void
+}
-- 
1.7.11.4