[PATCH] R600/SI: Expand all vec8 operations

Fri Jan 31 10:18:27 PST 2014

On Fri, Jan 31, 2014 at 10:12:25AM -0800, Matt Arsenault wrote:
> On 01/31/2014 08:53 AM, Tom Stellard wrote:
> >From: Tom Stellard <thomas.stellard at amd.com>
> >
> >---
> >  lib/Target/R600/AMDGPUISelLowering.cpp |  8 +++++++-
> >  lib/Target/R600/SIISelLowering.cpp     | 28 +++++++++++++++++++++++++
> >  lib/Target/R600/SIInstructions.td      |  2 ++
> >  test/CodeGen/R600/fadd.ll              | 37 ++++++++++++++++++++++++++--------
> >  4 files changed, 66 insertions(+), 9 deletions(-)
> >
> >diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> >index b155267..53c2a88 100644
> >--- a/lib/Target/R600/AMDGPUISelLowering.cpp
> >+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> >@@ -120,8 +120,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
> >    setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
> >-  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
> >+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
> >+  setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
> >    setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
> >+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
> >+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
> >+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
> >+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
> >+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
> >    setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
> >    setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
> >diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> >index 36dd3cf..05e8661 100644
> >--- a/lib/Target/R600/SIISelLowering.cpp
> >+++ b/lib/Target/R600/SIISelLowering.cpp
> >@@ -97,6 +97,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::LOAD, MVT::i64, Custom);
> >    setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
> >    setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
> >+  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
> >    setOperationAction(ISD::STORE, MVT::i32, Custom);
> >    setOperationAction(ISD::STORE, MVT::i64, Custom);
> >@@ -146,6 +147,33 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> >    setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
> >+  // We only support LOAD/STORE and vector manipulation ops for vectors
> >+  // with > 4 elements.
> >+  MVT VecTypes[] = {
> >+    MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
> >+  };
> >+
> >+  const size_t NumVecTypes = array_lengthof(VecTypes);
> >+  for (unsigned Type = 0; Type < NumVecTypes; ++Type) {
> >+    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
> >+      switch(Op) {
> >+      case ISD::LOAD:
> >+      case ISD::STORE:
> >+      case ISD::BUILD_VECTOR:
> >+      case ISD::BITCAST:
> >+      case ISD::EXTRACT_VECTOR_ELT:
> >+      case ISD::INSERT_VECTOR_ELT:
> >+      case ISD::CONCAT_VECTORS:
> >+      case ISD::INSERT_SUBVECTOR:
> >+      case ISD::EXTRACT_SUBVECTOR:
> >+        break;
> >+      default:
> >+        setOperationAction(Op, VecTypes[Type], Expand);
> >+        break;
> >+      }
> >+    }
> >+  }
> >+
> >    setTargetDAGCombine(ISD::SELECT_CC);
> >    setTargetDAGCombine(ISD::SETCC);
> >diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> >index 912b59a..cdd34e2 100644
> >--- a/lib/Target/R600/SIInstructions.td
> >+++ b/lib/Target/R600/SIInstructions.td
> >@@ -1668,6 +1668,8 @@ def : BitConvert <v4i32, v4f32, VReg_128>;
> >  def : BitConvert <v4i32, i128,  VReg_128>;
> >  def : BitConvert <i128, v4i32,  VReg_128>;
> >+def : BitConvert <v8f32, v8i32, SReg_256>;
> >+def : BitConvert <v8i32, v8f32, SReg_256>;
> >  def : BitConvert <v8i32, v32i8, SReg_256>;
> >  def : BitConvert <v32i8, v8i32, SReg_256>;
> >  def : BitConvert <v8i32, v32i8, VReg_256>;
> >diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
> >index f467bb7..5d2b806 100644
> >--- a/test/CodeGen/R600/fadd.ll
> >+++ b/test/CodeGen/R600/fadd.ll
> >@@ -1,9 +1,8 @@
> >-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
> >-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
> >+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
> >+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
> >-; R600-CHECK: @fadd_f32
> >+; FUNC-LABEL: @fadd_f32
> >  ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
> >-; SI-CHECK: @fadd_f32
> >  ; SI-CHECK: V_ADD_F32
> >  define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
> >  entry:
> >@@ -12,10 +11,9 @@ entry:
> >     ret void
> >  }
> >-; R600-CHECK: @fadd_v2f32
> >+; FUNC-LABEL: @fadd_v2f32
> >  ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
> >  ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
> >-; SI-CHECK: @fadd_v2f32
> >  ; SI-CHECK: V_ADD_F32
> >  ; SI-CHECK: V_ADD_F32
> >  define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
> >@@ -25,12 +23,11 @@ entry:
> >    ret void
> >  }
> >-; R600-CHECK: @fadd_v4f32
> >+; FUNC-LABEL: @fadd_v4f32
> >  ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> >  ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> >  ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> >  ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> >-; SI-CHECK: @fadd_v4f32
> >  ; SI-CHECK: V_ADD_F32
> >  ; SI-CHECK: V_ADD_F32
> >  ; SI-CHECK: V_ADD_F32
> >@@ -43,3 +40,27 @@ define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
> >    store <4 x float> %result, <4 x float> addrspace(1)* %out
> >    ret void
> >  }
> >+
> >+; FUNC-LABEL: @fadd_v8f32
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; R600-CHECK: ADD
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+; SI-CHECK: V_ADD_F32
> >+define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
> >+entry:
> >+  %0 = fadd <8 x float> %a, %b
> >+  store <8 x float> %0, <8 x float> addrspace(1)* %out
> >+  ret void
> >+}
> Can you add a test with an i32 vector? Also, what about f64 and i64 vectors?
>

I can add an i32 vector test, but this patch only deals with i32 and f32 vectors,
so I'm not sure that f64 and i64 tests are relevant.  Maybe I should clarify in
the commit message.

-Tom