[Mesa-dev] [PATCH] R600: Expand VSELECT for all types

Tue Jul 16 18:39:20 PDT 2013

Hi,

The attached three patches along with this one should fix VSELECT on SI
as well.

-Tom

On Tue, Jul 16, 2013 at 05:12:40PM -0500, Aaron Watry wrote:
> Looks good to me.
> 
> I've tested on Cedar (HD5400) with no OpenCL regressions, but cannot
> test on SI because SETCC still causes issues (see
> https://bugs.freedesktop.org/show_bug.cgi?id=66175).  Once SETCC is
> fixed for SI, we should probably add SI-CHECK lines to vselect.ll
> 
> --Aaron
> 
> On Tue, Jul 16, 2013 at 2:15 PM, Tom Stellard <tom at stellard.net> wrote:
> > From: Tom Stellard <thomas.stellard at amd.com>
> >
> > ---
> >  lib/Target/R600/AMDGPUISelLowering.cpp |  3 +++
> >  lib/Target/R600/R600ISelLowering.cpp   |  3 ---
> >  test/CodeGen/R600/vselect.ll           | 30 ++++++++++++++++++++++++++++++
> >  3 files changed, 33 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> > index 9891ad3..e93ddc4 100644
> > --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> > @@ -77,6 +77,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::UDIV, MVT::i32, Expand);
> >    setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
> >    setOperationAction(ISD::UREM, MVT::i32, Expand);
> > +  setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
> > +  setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
> >
> >    int types[] = {
> >      (int)MVT::v2i32,
> > @@ -97,6 +99,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >      setOperationAction(ISD::SUB,  VT, Expand);
> >      setOperationAction(ISD::UDIV, VT, Expand);
> >      setOperationAction(ISD::UREM, VT, Expand);
> > +    setOperationAction(ISD::VSELECT, VT, Expand);
> >      setOperationAction(ISD::XOR,  VT, Expand);
> >    }
> >  }
> > diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> > index 7aef08a..1067b38 100644
> > --- a/lib/Target/R600/R600ISelLowering.cpp
> > +++ b/lib/Target/R600/R600ISelLowering.cpp
> > @@ -67,9 +67,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::SELECT, MVT::i32, Custom);
> >    setOperationAction(ISD::SELECT, MVT::f32, Custom);
> >
> > -  setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
> > -  setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
> > -
> >    // Legalize loads and stores to the private address space.
> >    setOperationAction(ISD::LOAD, MVT::i32, Custom);
> >    setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
> > diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
> > index 3f08cec..79d896b 100644
> > --- a/test/CodeGen/R600/vselect.ll
> > +++ b/test/CodeGen/R600/vselect.ll
> > @@ -14,6 +14,20 @@ entry:
> >    ret void
> >  }
> >
> > +;EG-CHECK: @test_select_v2f32
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +
> > +define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
> > +entry:
> > +  %0 = load <2 x float> addrspace(1)* %in0
> > +  %1 = load <2 x float> addrspace(1)* %in1
> > +  %cmp = fcmp one <2 x float> %0, %1
> > +  %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
> > +  store <2 x float> %result, <2 x float> addrspace(1)* %out
> > +  ret void
> > +}
> > +
> >  ;EG-CHECK: @test_select_v4i32
> >  ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> >  ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > @@ -29,3 +43,19 @@ entry:
> >    store <4 x i32> %result, <4 x i32> addrspace(1)* %out
> >    ret void
> >  }
> > +
> > +;EG-CHECK: @test_select_v4f32
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
> > +
> > +define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
> > +entry:
> > +  %0 = load <4 x float> addrspace(1)* %in0
> > +  %1 = load <4 x float> addrspace(1)* %in1
> > +  %cmp = fcmp one <4 x float> %0, %1
> > +  %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
> > +  store <4 x float> %result, <4 x float> addrspace(1)* %out
> > +  ret void
> > +}
> > --
> > 1.7.11.4
> >
> > _______________________________________________
> > mesa-dev mailing list
> > mesa-dev at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
>From 762ec2680973282127737a8b5797edf9ff2ad87d Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Tue, 16 Jul 2013 18:13:03 -0700
Subject: [PATCH 1/3] R600/SI: Add support for v2f32 stores

---
 lib/Target/R600/AMDGPUISelLowering.cpp |  3 +++
 lib/Target/R600/SIInstructions.td      |  3 +++
 test/CodeGen/R600/store.ll             | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index e93ddc4..04d379e 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -57,6 +57,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::STORE, MVT::f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
 
+  setOperationAction(ISD::STORE, MVT::v2f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
+
   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index ffa45c5..a74efcc 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1482,6 +1482,9 @@ def : BitConvert <i64, f64, VReg_64>;
 
 def : BitConvert <f64, i64, VReg_64>;
 
+def : BitConvert <v2f32, v2i32, VReg_64>;
+def : BitConvert <v2i32, v2f32, VReg_64>;
+
 /********** =================== **********/
 /********** Src & Dst modifiers **********/
 /********** =================== **********/
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index f8c6f84..d233c73 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -15,6 +15,24 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
   ret void
 }
 
+; vec2 floating-point stores
+; EG-CHECK: @store_v2f32
+; EG-CHECK: RAT_WRITE_CACHELESS_32_eg
+; EG-CHECK-NEXT: RAT_WRITE_CACHELESS_32_eg
+; CM-CHECK: @store_v2f32
+; CM-CHECK: EXPORT_RAT_INST_STORE_DWORD
+; CM-CHECK-NEXT: EXPORT_RAT_INST_STORE_DWORD
+; SI-CHECK: @store_v2f32
+; SI-CHECK: BUFFER_STORE_DWORDX2
+
+define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
+entry:
+  %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
+  %1 = insertelement <2 x float> %0, float %b, i32 0
+  store <2 x float> %1, <2 x float> addrspace(1)* %out
+  ret void
+}
+
 ; The stores in this function are combined by the optimizer to create a
 ; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
 ; should not try to split the 64-bit store back into 2 32-bit stores.
-- 
1.7.11.4

-------------- next part --------------
>From b75ca32d36800c3bf99928a98847068283f97a3d Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Tue, 16 Jul 2013 18:27:21 -0700
Subject: [PATCH 2/3] R600/SI: Add support for v2f32 loads

---
 lib/Target/R600/AMDGPUISelLowering.cpp |  3 +++
 lib/Target/R600/SIInstructions.td      |  1 +
 lib/Target/R600/SIRegisterInfo.td      |  2 +-
 test/CodeGen/R600/load.ll              | 14 ++++++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 04d379e..9250f0a 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -69,6 +69,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::LOAD, MVT::f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
 
+  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+
   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index a74efcc..48add71 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1705,6 +1705,7 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
 defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
 defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
 defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
+defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
 defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
 defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
 
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 244d4c0..292b9d2 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -153,7 +153,7 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
   (add SGPR_32, M0Reg)
 >;
 
-def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
   (add SGPR_64, VCCReg, EXECReg)
 >;
 
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index d1ebaa3..60f7f86 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -41,6 +41,20 @@ entry:
   ret void
 }
 
+; load a v2f32 value from the global address space
+; R600-CHECK: @load_v2f32
+; R600-CHECK: VTX_READ_32
+; R600-CHECK: VTX_READ_32
+
+; SI-CHECK: @load_v2f32
+; SI-CHECK: BUFFER_LOAD_DWORDX2
+define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
+entry:
+  %0 = load <2 x float> addrspace(1)* %in
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
 ; Load an i32 value from the constant address space.
 ; R600-CHECK: @load_const_addrspace_i32
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
-- 
1.7.11.4

-------------- next part --------------
>From 31b245e0948ecdb0c2b7a2efa54f4683aba7653e Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Tue, 16 Jul 2013 18:32:24 -0700
Subject: [PATCH 3/3] R600/SI: Fix crash with VSELECT

https://bugs.freedesktop.org/show_bug.cgi?id=66175
---
 lib/Target/R600/SIISelLowering.cpp | 11 ++++++++++-
 lib/Target/R600/SIInstructions.td  |  3 +++
 test/CodeGen/R600/vselect.ll       | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 336bfbf..520b0e4 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -34,6 +34,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
   addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
 
+  addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
+  addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
+
   addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
   addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
   addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
@@ -72,6 +75,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
 
+  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
+  setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+
   setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -316,7 +322,10 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
 }
 
 EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
-  return MVT::i1;
+  if (!VT.isVector()) {
+    return MVT::i1;
+  }
+  return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
 }
 
 MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 48add71..68cf692 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1485,6 +1485,9 @@ def : BitConvert <f64, i64, VReg_64>;
 def : BitConvert <v2f32, v2i32, VReg_64>;
 def : BitConvert <v2i32, v2f32, VReg_64>;
 
+def : BitConvert <v4f32, v4i32, VReg_128>;
+def : BitConvert <v4i32, v4f32, VReg_128>;
+
 /********** =================== **********/
 /********** Src & Dst modifiers **********/
 /********** =================== **********/
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index 79d896b..72a9084 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -1,9 +1,14 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
 
 ;EG-CHECK: @test_select_v2i32
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
+;SI-CHECK: @test_select_v2i32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+
 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
 entry:
   %0 = load <2 x i32> addrspace(1)* %in0
@@ -18,6 +23,10 @@ entry:
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
+;SI-CHECK: @test_select_v2f32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+
 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
 entry:
   %0 = load <2 x float> addrspace(1)* %in0
@@ -34,6 +43,12 @@ entry:
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
+;SI-CHECK: @test_select_v4i32
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: V_CNDMASK_B32_e64
+
 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
 entry:
   %0 = load <4 x i32> addrspace(1)* %in0
-- 
1.7.11.4