[llvm-branch-commits] [llvm-branch] r235317 - Merging r227990:

Tom Stellard thomas.stellard at amd.com
Mon Apr 20 11:06:11 PDT 2015


Author: tstellar
Date: Mon Apr 20 13:06:11 2015
New Revision: 235317

URL: http://llvm.org/viewvc/llvm-project?rev=235317&view=rev
Log:
Merging r227990:

------------------------------------------------------------------------
r227990 | marek.olsak | 2015-02-03 12:38:12 -0500 (Tue, 03 Feb 2015) | 14 lines

R600/SI: Don't generate non-existent LSHL, LSHR, ASHR B32 variants on VI

This can happen when a REV instruction is commuted.

The trick is not to define the _vi versions of instructions, which has these
consequences:
- code generation will always fail if a pseudo cannot be lowered
  (very useful to catch bugs where an unsupported instruction somehow makes
   it to the printer)
- ability to query if a pseudo can be lowered, which is done in commuteOpcode
  to prevent REV from commuting to non-REV on VI

Tested-by: Michel Dänzer <michel.daenzer at amd.com>

------------------------------------------------------------------------

Modified:
    llvm/branches/release_36/lib/Target/R600/SIInstrInfo.cpp
    llvm/branches/release_36/lib/Target/R600/SIInstrInfo.td
    llvm/branches/release_36/lib/Target/R600/SIInstructions.td
    llvm/branches/release_36/test/CodeGen/R600/shl.ll
    llvm/branches/release_36/test/CodeGen/R600/sra.ll

Modified: llvm/branches/release_36/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIInstrInfo.cpp?rev=235317&r1=235316&r2=235317&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SIInstrInfo.cpp Mon Apr 20 13:06:11 2015
@@ -408,11 +408,15 @@ unsigned SIInstrInfo::commuteOpcode(unsi
   int NewOpc;
 
   // Try to map original to commuted opcode
-  if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
+  NewOpc = AMDGPU::getCommuteRev(Opcode);
+  // Check if the commuted (REV) opcode exists on the target.
+  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
     return NewOpc;
 
   // Try to map commuted to original opcode
-  if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
+  NewOpc = AMDGPU::getCommuteOrig(Opcode);
+  // Check if the original (non-REV) opcode exists on the target.
+  if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
     return NewOpc;
 
   return Opcode;

Modified: llvm/branches/release_36/lib/Target/R600/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIInstrInfo.td?rev=235317&r1=235316&r2=235317&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIInstrInfo.td (original)
+++ llvm/branches/release_36/lib/Target/R600/SIInstrInfo.td Mon Apr 20 13:06:11 2015
@@ -945,15 +945,26 @@ multiclass VOP3_2_m <vop op, dag outs, d
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
            VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
 
-  def _si : VOP3_Real_si <op.SI3,
-              outs, ins, asm, opName>,
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
             VOP3DisableFields<1, 0, HasMods>;
 
-  def _vi : VOP3_Real_vi <op.VI3,
-              outs, ins, asm, opName>,
+  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
             VOP3DisableFields<1, 0, HasMods>;
 }
 
+multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, string revOp,
+                     bit HasMods = 1, bit UseFullOp = 0> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
+            VOP3DisableFields<1, 0, HasMods>;
+
+  // No VI instruction. This class is for SI only.
+}
+
 multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
                       list<dag> pattern, string opName, string revOp,
                       bit HasMods = 1, bit UseFullOp = 0> {
@@ -1073,6 +1084,21 @@ multiclass VOP2Inst <vop2 op, string opN
   revOp, P.HasModifiers
 >;
 
+multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
+                       SDPatternOperator node = null_frag,
+                       string revOp = opName> {
+  defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                        i1:$clamp, i32:$omod)),
+                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+    opName, revOp, P.HasModifiers>;
+}
+
 multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
                          dag ins32, string asm32, list<dag> pat32,
                          dag ins64, string asm64, list<dag> pat64,

Modified: llvm/branches/release_36/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIInstructions.td?rev=235317&r1=235316&r2=235317&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIInstructions.td (original)
+++ llvm/branches/release_36/lib/Target/R600/SIInstructions.td Mon Apr 20 13:06:11 2015
@@ -1538,21 +1538,21 @@ defm V_WRITELANE_B32 : VOP2SI_3VI_m <
 // These instructions only exist on SI and CI
 let SubtargetPredicate = isSICI in {
 
-defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
+defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
   VOP_F32_F32_F32, AMDGPUfmin_legacy
 >;
-defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
+defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
   VOP_F32_F32_F32, AMDGPUfmax_legacy
 >;
 
 let isCommutable = 1 in {
-defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
-defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
+defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
+defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32",
   VOP_I32_I32_I32, sra
 >;
 
 let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
+defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
 }
 
 } // End isCommutable = 1

Modified: llvm/branches/release_36/test/CodeGen/R600/shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/shl.ll?rev=235317&r1=235316&r2=235317&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/shl.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/shl.ll Mon Apr 20 13:06:11 2015
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
 
 ;EG-CHECK: {{^}}shl_v2i32:
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -10,6 +10,10 @@
 ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
+;VI-CHECK: {{^}}shl_v2i32:
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
   %a = load <2 x i32> addrspace(1) * %in
@@ -31,6 +35,12 @@ define void @shl_v2i32(<2 x i32> addrspa
 ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
+;VI-CHECK: {{^}}shl_v4i32:
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
@@ -55,6 +65,9 @@ define void @shl_v4i32(<4 x i32> addrspa
 ;SI-CHECK: {{^}}shl_i64:
 ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
+;VI-CHECK: {{^}}shl_i64:
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
   %a = load i64 addrspace(1) * %in
@@ -90,6 +103,10 @@ define void @shl_i64(i64 addrspace(1)* %
 ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
+;VI-CHECK: {{^}}shl_v2i64:
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
   %a = load <2 x i64> addrspace(1) * %in
@@ -147,6 +164,12 @@ define void @shl_v2i64(<2 x i64> addrspa
 ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
+;VI-CHECK: {{^}}shl_v4i64:
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
   %a = load <4 x i64> addrspace(1) * %in

Modified: llvm/branches/release_36/test/CodeGen/R600/sra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/sra.ll?rev=235317&r1=235316&r2=235317&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/sra.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/sra.ll Mon Apr 20 13:06:11 2015
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI-CHECK %s
 
 ;EG-CHECK-LABEL: {{^}}ashr_v2i32:
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -10,6 +10,10 @@
 ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
+;VI-CHECK-LABEL: {{^}}ashr_v2i32:
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
   %a = load <2 x i32> addrspace(1) * %in
@@ -31,6 +35,12 @@ define void @ashr_v2i32(<2 x i32> addrsp
 ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 ;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
+;VI-CHECK-LABEL: {{^}}ashr_v4i32:
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;VI-CHECK: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
 define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
@@ -45,6 +55,10 @@ define void @ashr_v4i32(<4 x i32> addrsp
 
 ;SI-CHECK-LABEL: {{^}}ashr_i64:
 ;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+
+;VI-CHECK-LABEL: {{^}}ashr_i64:
+;VI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+
 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = sext i32 %in to i64
@@ -69,6 +83,10 @@ entry:
 
 ;SI-CHECK-LABEL: {{^}}ashr_i64_2:
 ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+;VI-CHECK-LABEL: {{^}}ashr_i64_2:
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
   %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
@@ -109,6 +127,10 @@ entry:
 ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
+;VI-CHECK-LABEL: {{^}}ashr_v2i64:
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
   %a = load <2 x i64> addrspace(1) * %in
@@ -174,6 +196,12 @@ define void @ashr_v2i64(<2 x i64> addrsp
 ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 ;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
+;VI-CHECK-LABEL: {{^}}ashr_v4i64:
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;VI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
 define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
   %a = load <4 x i64> addrspace(1) * %in






More information about the llvm-branch-commits mailing list