[llvm-commits] [llvm] r127410 - in /llvm/trunk: lib/Target/PTX/PTXInstrInfo.td lib/Target/PTX/PTXSubtarget.h test/CodeGen/PTX/fdiv-sm10.ll test/CodeGen/PTX/fdiv-sm13.ll test/CodeGen/PTX/mad.ll

Thu Mar 10 08:57:18 PST 2011

Author: jholewinski
Date: Thu Mar 10 10:57:18 2011
New Revision: 127410

URL: http://llvm.org/viewvc/llvm-project?rev=127410&view=rev
Log:
PTX: Add preliminary support for floating-point divide and multiply-and-add

Added:
    llvm/trunk/test/CodeGen/PTX/fdiv-sm10.ll
    llvm/trunk/test/CodeGen/PTX/fdiv-sm13.ll
    llvm/trunk/test/CodeGen/PTX/mad.ll
Modified:
    llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
    llvm/trunk/lib/Target/PTX/PTXSubtarget.h

Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.td?rev=127410&r1=127409&r2=127410&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.td Thu Mar 10 10:57:18 2011
@@ -21,9 +21,22 @@
 // Code Generation Predicates
 //===----------------------------------------------------------------------===//
 
+// Addressing
 def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
 def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
 
+// Shader Model Support
+def SupportsSM13       : Predicate<"getSubtarget().supportsSM13()">;
+def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
+def SupportsSM20       : Predicate<"getSubtarget().supportsSM20()">;
+def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+
+// PTX Version Support
+def SupportsPTX20       : Predicate<"getSubtarget().supportsPTX20()">;
+def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">;
+def SupportsPTX21       : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+
 //===----------------------------------------------------------------------===//
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
@@ -165,8 +178,8 @@
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
 
-// Three-operand floating-point instruction template
-multiclass FLOAT3<string opcstr, SDNode opnode> {
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
   def rr32 : InstPTX<(outs RRegf32:$d),
                      (ins RRegf32:$a, RRegf32:$b),
                      !strconcat(opcstr, ".f32\t$d, $a, $b"),
@@ -185,6 +198,34 @@
                      [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
 }
 
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+  def rrr32 : InstPTX<(outs RRegf32:$d),
+                      (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+                                                          RRegf32:$b),
+                                                 RRegf32:$c))]>;
+  def rri32 : InstPTX<(outs RRegf32:$d),
+                      (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+                      !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+                                                          RRegf32:$b),
+                                                 fpimm:$c))]>;
+  def rrr64 : InstPTX<(outs RRegf64:$d),
+                      (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+                                                          RRegf64:$b),
+                                                 RRegf64:$c))]>;
+  def rri64 : InstPTX<(outs RRegf64:$d),
+                      (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+                      !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+                                                          RRegf64:$b),
+                                                 fpimm:$c))]>;
+}
+
 multiclass INT3<string opcstr, SDNode opnode> {
   def rr16 : InstPTX<(outs RRegu16:$d),
                      (ins RRegu16:$a, RRegu16:$b),
@@ -304,9 +345,59 @@
 
 ///===- Floating-Point Arithmetic Instructions ----------------------------===//
 
-defm FADD : FLOAT3<"add", fadd>;
-defm FSUB : FLOAT3<"sub", fsub>;
-defm FMUL : FLOAT3<"mul", fmul>;
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
+
+// TODO: Allow user selection of rounding modes for fdiv.
+// For division, we need to have f32 and f64 differently.
+// For f32, we just always use .approx since it is supported on all hardware
+// for PTX 1.4+, which is our minimum target.
+def FDIVrr32 : InstPTX<(outs RRegf32:$d),
+                       (ins RRegf32:$a, RRegf32:$b),
+                       "div.approx.f32\t$d, $a, $b",
+                       [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
+def FDIVri32 : InstPTX<(outs RRegf32:$d),
+                       (ins RRegf32:$a, f32imm:$b),
+                       "div.approx.f32\t$d, $a, $b",
+                       [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
+
+// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
+def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, RRegf64:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, f64imm:$b),
+                           "div.rn.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+                   Requires<[SupportsSM13]>;
+def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, RRegf64:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
+def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
+                           (ins RRegf64:$a, f64imm:$b),
+                           "div.f64\t$d, $a, $b",
+                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+                   Requires<[DoesNotSupportSM13]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky.  In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other.  Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMAD : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>;
+
+
 
 ///===- Integer Arithmetic Instructions -----------------------------------===//
 

Modified: llvm/trunk/lib/Target/PTX/PTXSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXSubtarget.h?rev=127410&r1=127409&r2=127410&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXSubtarget.h (original)
+++ llvm/trunk/lib/Target/PTX/PTXSubtarget.h Thu Mar 10 10:57:18 2011
@@ -54,6 +54,14 @@
 
       bool use64BitAddresses() const { return Use64BitAddresses; }
 
+      bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
+
+      bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
+
+      bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; }
+
+      bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
       std::string ParseSubtargetFeatures(const std::string &FS,
                                          const std::string &CPU);
   }; // class PTXSubtarget

Added: llvm/trunk/test/CodeGen/PTX/fdiv-sm10.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/fdiv-sm10.ll?rev=127410&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/fdiv-sm10.ll (added)
+++ llvm/trunk/test/CodeGen/PTX/fdiv-sm10.ll Thu Mar 10 10:57:18 2011
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx -mattr=+sm10 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK-NEXT: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.f64 fd0, fd1, fd2;
+; CHECK-NEXT: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}

Added: llvm/trunk/test/CodeGen/PTX/fdiv-sm13.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/fdiv-sm13.ll?rev=127410&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/fdiv-sm13.ll (added)
+++ llvm/trunk/test/CodeGen/PTX/fdiv-sm13.ll Thu Mar 10 10:57:18 2011
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ptx -mattr=+sm13 | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK-NEXT: ret;
+	%a = fdiv float %x, %y
+	ret float %a
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: div.rn.f64 fd0, fd1, fd2;
+; CHECK-NEXT: ret;
+	%a = fdiv double %x, %y
+	ret double %a
+}

Added: llvm/trunk/test/CodeGen/PTX/mad.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/mad.ll?rev=127410&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/mad.ll (added)
+++ llvm/trunk/test/CodeGen/PTX/mad.ll Thu Mar 10 10:57:18 2011
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z) {
+; CHECK: mad.rn.f32 f0, f1, f2, f3;
+; CHECK-NEXT: ret;
+	%a = fmul float %x, %y
+  %b = fadd float %a, %z
+	ret float %b
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z) {
+; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3;
+; CHECK-NEXT: ret;
+	%a = fmul double %x, %y
+  %b = fadd double %a, %z
+	ret double %b
+}