[llvm-commits] [llvm] r126824 - in /llvm/trunk: lib/Target/PTX/ test/CodeGen/PTX/
Che-Liang Chiou
clchiou at gmail.com
Tue Mar 1 19:20:28 PST 2011
Author: clchiou
Date: Tue Mar 1 21:20:28 2011
New Revision: 126824
URL: http://llvm.org/viewvc/llvm-project?rev=126824&view=rev
Log:
Extend initial support for primitive types in PTX backend
- Allow i16, i32, i64, float, and double types, using the native .u16,
.u32, .u64, .f32, and .f64 PTX types.
- Allow loading/storing of all primitive types.
- Allow primitive types to be passed as parameters.
- Allow selection of PTX Version and Shader Model as sub-target attributes.
- Merge integer/floating-point test cases for load/store.
- Use .u32 instead of .s32 to conform to output from NVidia nvcc compiler.
Patch by Justin Holewinski
Removed:
llvm/trunk/test/CodeGen/PTX/ld_float.ll
llvm/trunk/test/CodeGen/PTX/st_float.ll
Modified:
llvm/trunk/lib/Target/PTX/PTX.td
llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp
llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp
llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td
llvm/trunk/lib/Target/PTX/PTXSubtarget.cpp
llvm/trunk/lib/Target/PTX/PTXSubtarget.h
llvm/trunk/test/CodeGen/PTX/add.ll
llvm/trunk/test/CodeGen/PTX/ld.ll
llvm/trunk/test/CodeGen/PTX/mov.ll
llvm/trunk/test/CodeGen/PTX/mul.ll
llvm/trunk/test/CodeGen/PTX/options.ll
llvm/trunk/test/CodeGen/PTX/st.ll
llvm/trunk/test/CodeGen/PTX/sub.ll
Modified: llvm/trunk/lib/Target/PTX/PTX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTX.td?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTX.td (original)
+++ llvm/trunk/lib/Target/PTX/PTX.td Tue Mar 1 21:20:28 2011
@@ -19,8 +19,34 @@
// Subtarget Features.
//===----------------------------------------------------------------------===//
-def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
- "Enable sm_20 target architecture">;
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
+ "Use PTX Language Version 1.4">;
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0",
+ [FeaturePTX14]>;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1",
+ [FeaturePTX20]>;
+
+//===- PTX Shader Model ---------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
+ "Enable Shader Model 1.0 compliance">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
+ "Enable Shader Model 1.3 compliance",
+ [FeatureSM10, FeatureDouble]>;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
+ "Enable Shader Model 2.0 compliance",
+ [FeatureSM13]>;
//===----------------------------------------------------------------------===//
// PTX supported processors.
Modified: llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp Tue Mar 1 21:20:28 2011
@@ -24,6 +24,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
@@ -37,13 +38,6 @@
using namespace llvm;
-static cl::opt<std::string>
-OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
-
-static cl::opt<std::string>
-OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
- cl::init("sm_10"));
-
namespace {
class PTXAsmPrinter : public AsmPrinter {
public:
@@ -82,11 +76,14 @@
static const char PARAM_PREFIX[] = "__param_";
static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
+#define TEST_REGCLS(cls, clsstr) \
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
- TEST_REGCLS(RRegf32, f32);
- TEST_REGCLS(RRegs32, s32);
TEST_REGCLS(Preds, pred);
+ TEST_REGCLS(RRegu16, u16);
+ TEST_REGCLS(RRegu32, u32);
+ TEST_REGCLS(RRegu64, u64);
+ TEST_REGCLS(RRegf32, f32);
+ TEST_REGCLS(RRegf64, f64);
#undef TEST_REGCLS
llvm_unreachable("Not in any register class!");
@@ -121,7 +118,14 @@
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
case Type::FloatTyID: return ".f32";
- case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
case Type::ArrayTyID:
case Type::PointerTyID:
type = dyn_cast<const SequentialType>(type)->getElementType();
@@ -162,8 +166,11 @@
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
- OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
- OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+ OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ (ST.supportsDouble() ? "" : ", map_f64_to_f32")));
OutStreamer.AddBlankLine();
// declare global variables
@@ -236,11 +243,24 @@
break;
case MachineOperand::MO_FPImmediate:
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+ // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+ if (isFloat) {
+ OS << "0F";
+ }
+ else {
+ OS << "0D";
+ }
+ // Emit the encoded floating-point value.
if (constFP.getZExtValue() > 0) {
- OS << "0F" << constFP.toString(16, false);
+ OS << constFP.toString(16, false);
}
else {
- OS << "0F00000000";
+ OS << "00000000";
+ // If We have a double-precision zero, pad to 8-bytes.
+ if (!isFloat) {
+ OS << "00000000";
+ }
}
break;
}
@@ -338,12 +358,18 @@
if (!MFI->argRegEmpty()) {
decl += " (";
if (isKernel) {
- for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
- if (i != 0)
+ unsigned cnt = 0;
+ //for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
+ for(PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+ reg = *i;
+ assert(reg != PTX::NoRegister && "Not a valid register!");
+ if (i != b)
decl += ", ";
- decl += ".param .s32 "; // TODO: add types
+ decl += ".param .u32"; // TODO: Parse type from register map
+ decl += " ";
decl += PARAM_PREFIX;
- decl += utostr(i + 1);
+ decl += utostr(++cnt);
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
Modified: llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelDAGToDAG.cpp Tue Mar 1 21:20:28 2011
@@ -15,6 +15,7 @@
#include "PTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -66,14 +67,34 @@
}
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
- SDValue index = Node->getOperand(1);
- DebugLoc dl = Node->getDebugLoc();
+ SDValue index = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned opcode;
if (index.getOpcode() != ISD::TargetConstant)
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+ if (Node->getValueType(0) == MVT::i16) {
+ opcode = PTX::LDpiU16;
+ }
+ else if (Node->getValueType(0) == MVT::i32) {
+ opcode = PTX::LDpiU32;
+ }
+ else if (Node->getValueType(0) == MVT::i64) {
+ opcode = PTX::LDpiU64;
+ }
+ else if (Node->getValueType(0) == MVT::f32) {
+ opcode = PTX::LDpiF32;
+ }
+ else if (Node->getValueType(0) == MVT::f64) {
+ opcode = PTX::LDpiF64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type for ld.param");
+ }
+
return PTXInstrInfo::
- GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+ GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
}
// Match memory operand of the form [reg+reg]
Modified: llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp Tue Mar 1 21:20:28 2011
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,13 +28,17 @@
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
- addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
+ addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
-
+ addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -90,10 +95,13 @@
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
- argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
- argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
+ argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
+ argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
+ argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
+ argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
+ argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
};
-} // end anonymous namespace
+} // end anonymous namespace
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
@@ -192,12 +200,21 @@
SDValue Flag;
unsigned reg;
- if (Outs[0].VT == MVT::i32) {
+ if (Outs[0].VT == MVT::i16) {
+ reg = PTX::RH0;
+ }
+ else if (Outs[0].VT == MVT::i32) {
reg = PTX::R0;
}
+ else if (Outs[0].VT == MVT::i64) {
+ reg = PTX::RD0;
+ }
else if (Outs[0].VT == MVT::f32) {
reg = PTX::F0;
}
+ else if (Outs[0].VT == MVT::f64) {
+ reg = PTX::FD0;
+ }
else {
assert(false && "Can return only basic types");
}
Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp Tue Mar 1 21:20:28 2011
@@ -27,9 +27,12 @@
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
- { &PTX::RRegs32RegClass, PTX::MOVrr },
- { &PTX::RRegf32RegClass, PTX::MOVrr },
- { &PTX::PredsRegClass, PTX::MOVpp }
+ { &PTX::RRegu16RegClass, PTX::MOVU16rr },
+ { &PTX::RRegu32RegClass, PTX::MOVU32rr },
+ { &PTX::RRegu64RegClass, PTX::MOVU64rr },
+ { &PTX::RRegf32RegClass, PTX::MOVF32rr },
+ { &PTX::RRegf64RegClass, PTX::MOVF64rr },
+ { &PTX::PredsRegClass, PTX::MOVPREDrr }
};
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -76,8 +79,12 @@
switch (MI.getOpcode()) {
default:
return false;
- case PTX::MOVpp:
- case PTX::MOVrr:
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
"Invalid register-register move instruction");
Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.td?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.td Tue Mar 1 21:20:28 2011
@@ -114,7 +114,7 @@
// Address operands
def MEMri : Operand<i32> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RRegs32, i32imm);
+ let MIOperandInfo = (ops RRegu32, i32imm);
}
def MEMii : Operand<i32> {
let PrintMethod = "printMemOperand";
@@ -143,75 +143,115 @@
// Instruction Class Templates
//===----------------------------------------------------------------------===//
-// Three-operand f32 instruction template
+// Three-operand floating-point instruction template
multiclass FLOAT3<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegf32:$d),
- (ins RRegf32:$a, RRegf32:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
- def ri : InstPTX<(outs RRegf32:$d),
- (ins RRegf32:$a, f32imm:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+ def rr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
+ def ri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, f32imm:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+ def rr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
+ def ri64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
}
multiclass INT3<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
}
// no %type directive, non-communtable
multiclass INT3ntnc<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
+ def rr : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
- def ir : InstPTX<(outs RRegs32:$d),
- (ins i32imm:$a, RRegs32:$b),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def ir : InstPTX<(outs RRegu32:$d),
+ (ins i32imm:$a, RRegu32:$b),
!strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+ [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
}
-multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
def rr : InstPTX<(outs RC:$d),
(ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRrr:$a))]>;
def ri : InstPTX<(outs RC:$d),
(ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRri:$a))]>;
def ii : InstPTX<(outs RC:$d),
(ins MEMii:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRii:$a))]>;
}
-multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
def rr : InstPTX<(outs),
(ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRrr:$a)]>;
def ri : InstPTX<(outs),
(ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRri:$a)]>;
def ii : InstPTX<(outs),
(ins RC:$d, MEMii:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRii:$a)]>;
}
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -236,60 +276,67 @@
///===- Data Movement and Conversion Instructions -------------------------===//
let neverHasSideEffects = 1 in {
- // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
- def MOVpp
+ def MOVPREDrr
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
- def MOVrr
- : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
- def FMOVrr
+ def MOVU16rr
+ : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def MOVpi
+ def MOVPREDri
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
[(set Preds:$d, imm:$a)]>;
- def MOVri
- : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
- [(set RRegs32:$d, imm:$a)]>;
- def FMOVri
+ def MOVU16ri
+ : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RRegu16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RRegu32:$d, imm:$a)]>;
+ def MOVU164ri
+ : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RRegu64:$d, imm:$a)]>;
+ def MOVF32ri
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
[(set RRegf32:$d, fpimm:$a)]>;
-}
-
-// Integer loads
-defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
-defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
-defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
-defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
-defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
-
-def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
- "ld.param.%type\t$d, [$a]", []>;
-
-// Floating-point loads
-defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
-defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
-defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
-defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
-defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
-
-def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
- "ld.param.%type\t$d, [$a]", []>;
-
-// Integer stores
-defm STg : PTX_ST<"st.global", RRegs32, store_global>;
-defm STl : PTX_ST<"st.local", RRegs32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
-defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
-
-// Floating-point stores
-defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
-defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
-defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
+ def MOVF64ri
+ : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RRegf64:$d, fpimm:$a)]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local", load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// This is a special instruction that is manually inserted for kernel parameters
+def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
+ "ld.param.u16\t$d, [$a]", []>;
+def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
+ "ld.param.u32\t$d, [$a]", []>;
+def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
+ "ld.param.u64\t$d, [$a]", []>;
+def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
+ "ld.param.f32\t$d, [$a]", []>;
+def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
+ "ld.param.f64\t$d, [$a]", []>;
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local", store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
///===- Control Flow Instructions -----------------------------------------===//
Modified: llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXMFInfoExtract.cpp Tue Mar 1 21:20:28 2011
@@ -79,12 +79,12 @@
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Arg Reg: " << *i << "\n";);
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Local Var Reg: " << *i << "\n";);
return false;
Modified: llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td Tue Mar 1 21:20:28 2011
@@ -19,6 +19,8 @@
// Registers
//===----------------------------------------------------------------------===//
+///===- Predicate Registers -----------------------------------------------===//
+
def P0 : PTXReg<"p0">;
def P1 : PTXReg<"p1">;
def P2 : PTXReg<"p2">;
@@ -52,6 +54,43 @@
def P30 : PTXReg<"p30">;
def P31 : PTXReg<"p31">;
+///===- 16-bit Integer Registers ------------------------------------------===//
+
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+
+///===- 32-bit Integer Registers ------------------------------------------===//
+
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
def R2 : PTXReg<"r2">;
@@ -85,6 +124,43 @@
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
+///===- 64-bit Integer Registers ------------------------------------------===//
+
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+
+///===- 32-bit Floating-Point Registers -----------------------------------===//
+
def F0 : PTXReg<"f0">;
def F1 : PTXReg<"f1">;
def F2 : PTXReg<"f2">;
@@ -118,6 +194,41 @@
def F30 : PTXReg<"f30">;
def F31 : PTXReg<"f31">;
+///===- 64-bit Floating-Point Registers -----------------------------------===//
+
+def FD0 : PTXReg<"fd0">;
+def FD1 : PTXReg<"fd1">;
+def FD2 : PTXReg<"fd2">;
+def FD3 : PTXReg<"fd3">;
+def FD4 : PTXReg<"fd4">;
+def FD5 : PTXReg<"fd5">;
+def FD6 : PTXReg<"fd6">;
+def FD7 : PTXReg<"fd7">;
+def FD8 : PTXReg<"fd8">;
+def FD9 : PTXReg<"fd9">;
+def FD10 : PTXReg<"fd10">;
+def FD11 : PTXReg<"fd11">;
+def FD12 : PTXReg<"fd12">;
+def FD13 : PTXReg<"fd13">;
+def FD14 : PTXReg<"fd14">;
+def FD15 : PTXReg<"fd15">;
+def FD16 : PTXReg<"fd16">;
+def FD17 : PTXReg<"fd17">;
+def FD18 : PTXReg<"fd18">;
+def FD19 : PTXReg<"fd19">;
+def FD20 : PTXReg<"fd20">;
+def FD21 : PTXReg<"fd21">;
+def FD22 : PTXReg<"fd22">;
+def FD23 : PTXReg<"fd23">;
+def FD24 : PTXReg<"fd24">;
+def FD25 : PTXReg<"fd25">;
+def FD26 : PTXReg<"fd26">;
+def FD27 : PTXReg<"fd27">;
+def FD28 : PTXReg<"fd28">;
+def FD29 : PTXReg<"fd29">;
+def FD30 : PTXReg<"fd30">;
+def FD31 : PTXReg<"fd31">;
+
//===----------------------------------------------------------------------===//
// Register classes
@@ -129,14 +240,32 @@
P16, P17, P18, P19, P20, P21, P22, P23,
P24, P25, P26, P27, P28, P29, P30, P31]>;
-def RRegs32 : RegisterClass<"PTX", [i32], 32,
+def RRegu16 : RegisterClass<"PTX", [i16], 16,
+ [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
+ RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
+ RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
+ RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>;
+
+def RRegu32 : RegisterClass<"PTX", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
R16, R17, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27, R28, R29, R30, R31]>;
+def RRegu64 : RegisterClass<"PTX", [i64], 64,
+ [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
+ RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
+ RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
+ RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>;
+
def RRegf32 : RegisterClass<"PTX", [f32], 32,
[F0, F1, F2, F3, F4, F5, F6, F7,
F8, F9, F10, F11, F12, F13, F14, F15,
F16, F17, F18, F19, F20, F21, F22, F23,
F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def RRegf64 : RegisterClass<"PTX", [f64], 64,
+ [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
+ FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
+ FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
+ FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>;
Modified: llvm/trunk/lib/Target/PTX/PTXSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXSubtarget.cpp?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXSubtarget.cpp Tue Mar 1 21:20:28 2011
@@ -12,12 +12,33 @@
//===----------------------------------------------------------------------===//
#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
- std::string TARGET = "sm_20";
- // TODO: call ParseSubtargetFeatures(FS, TARGET);
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
+ : PTXShaderModel(PTX_SM_1_0),
+ PTXVersion(PTX_VERSION_1_4) {
+ std::string TARGET = "generic";
+ ParseSubtargetFeatures(FS, TARGET);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXShaderModel) {
+ default: llvm_unreachable("Unknown shader model");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ default: llvm_unreachable("Unknown PTX version");
+ case PTX_VERSION_1_4: return "1.4";
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ }
}
#include "PTXGenSubtarget.inc"
Modified: llvm/trunk/lib/Target/PTX/PTXSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXSubtarget.h?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXSubtarget.h (original)
+++ llvm/trunk/lib/Target/PTX/PTXSubtarget.h Tue Mar 1 21:20:28 2011
@@ -19,11 +19,36 @@
namespace llvm {
class PTXSubtarget : public TargetSubtarget {
private:
- bool is_sm20;
+ enum PTXShaderModelEnum {
+ PTX_SM_1_0,
+ PTX_SM_1_3,
+ PTX_SM_2_0
+ };
+
+ enum PTXVersionEnum {
+ PTX_VERSION_1_4,
+ PTX_VERSION_2_0,
+ PTX_VERSION_2_1
+ };
+
+ /// Shader Model supported on the target GPU.
+ PTXShaderModelEnum PTXShaderModel;
+
+ /// PTX Language Version.
+ PTXVersionEnum PTXVersion;
+
+ // The native .f64 type is supported on the hardware.
+ bool SupportsDouble;
public:
PTXSubtarget(const std::string &TT, const std::string &FS);
+ std::string getTargetString() const;
+
+ std::string getPTXVersionString() const;
+
+ bool supportsDouble() const { return SupportsDouble; }
+
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
}; // class PTXSubtarget
Modified: llvm/trunk/test/CodeGen/PTX/add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/add.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/add.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/add.ll Tue Mar 1 21:20:28 2011
@@ -1,29 +1,71 @@
; RUN: llc < %s -march=ptx | FileCheck %s
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: add.s32 r0, r1, r2;
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: add.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+ %z = add i16 %x, %y
+ ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: add.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
%z = add i32 %x, %y
-; CHECK: ret;
ret i32 %z
}
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: add.s32 r0, r1, 1;
- %z = add i32 %x, 1
-; CHECK: ret;
- ret i32 %z
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: add.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+ %z = add i64 %x, %y
+ ret i64 %z
}
-define ptx_device float @t3(float %x, float %y) {
+define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: add.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fadd float %x, %y
ret float %z
}
-define ptx_device float @t4(float %x) {
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: add.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+ %z = fadd double %x, %y
+ ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, 1;
+; CHECK-NEXT: ret;
+ %z = add i16 %x, 1
+ ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, 1;
+; CHECK-NEXT: ret;
+ %z = add i32 %x, 1
+ ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, 1;
+; CHECK-NEXT: ret;
+ %z = add i64 %x, 1
+ ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
; CHECK: add.f32 f0, f1, 0F3F800000;
; CHECK-NEXT: ret;
%z = fadd float %x, 1.0
ret float %z
}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
+; CHECK-NEXT: ret;
+ %z = fadd double %x, 1.0
+ ret double %z
+}
Modified: llvm/trunk/test/CodeGen/PTX/ld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/ld.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/ld.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/ld.ll Tue Mar 1 21:20:28 2011
@@ -1,78 +1,422 @@
; RUN: llc < %s -march=ptx | FileCheck %s
-;CHECK: .extern .global .s32 array[];
- at array = external global [10 x i32]
+;CHECK: .extern .global .u16 array_i16[];
+ at array_i16 = external global [10 x i16]
-;CHECK: .extern .const .s32 array_constant[];
- at array_constant = external addrspace(1) constant [10 x i32]
+;CHECK: .extern .const .u16 array_constant_i16[];
+ at array_constant_i16 = external addrspace(1) constant [10 x i16]
-;CHECK: .extern .local .s32 array_local[];
- at array_local = external addrspace(2) global [10 x i32]
+;CHECK: .extern .local .u16 array_local_i16[];
+ at array_local_i16 = external addrspace(2) global [10 x i16]
-;CHECK: .extern .shared .s32 array_shared[];
- at array_shared = external addrspace(4) global [10 x i32]
+;CHECK: .extern .shared .u16 array_shared_i16[];
+ at array_shared_i16 = external addrspace(4) global [10 x i16]
-define ptx_device i32 @t1(i32* %p) {
+;CHECK: .extern .global .u32 array_i32[];
+ at array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .u32 array_constant_i32[];
+ at array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .u32 array_local_i32[];
+ at array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .u32 array_shared_i32[];
+ at array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .u64 array_i64[];
+ at array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .u64 array_constant_i64[];
+ at array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .u64 array_local_i64[];
+ at array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .u64 array_shared_i64[];
+ at array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .f32 array_float[];
+ at array_float = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant_float[];
+ at array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local_float[];
+ at array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared_float[];
+ at array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .f64 array_double[];
+ at array_double = external global [10 x double]
+
+;CHECK: .extern .const .f64 array_constant_double[];
+ at array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .f64 array_local_double[];
+ at array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .f64 array_shared_double[];
+ at array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device i16 @t1_u16(i16* %p) {
entry:
-;CHECK: ld.global.s32 r0, [r1];
+;CHECK: ld.global.u16 rh0, [r1];
+;CHECK-NEXT; ret;
+ %x = load i16* %p
+ ret i16 %x
+}
+
+define ptx_device i32 @t1_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1];
+;CHECK-NEXT: ret;
%x = load i32* %p
ret i32 %x
}
-define ptx_device i32 @t2(i32* %p) {
+define ptx_device i64 @t1_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1];
+;CHECK-NEXT: ret;
+ %x = load i64* %p
+ ret i64 %x
+}
+
+define ptx_device float @t1_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1];
+;CHECK-NEXT: ret;
+ %x = load float* %p
+ ret float %x
+}
+
+define ptx_device double @t1_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1];
+;CHECK-NEXT: ret;
+ %x = load double* %p
+ ret double %x
+}
+
+define ptx_device i16 @t2_u16(i16* %p) {
entry:
-;CHECK: ld.global.s32 r0, [r1+4];
+;CHECK: ld.global.u16 rh0, [r1+2];
+;CHECK-NEXT: ret;
+ %i = getelementptr i16* %p, i32 1
+ %x = load i16* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1+4];
+;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 1
%x = load i32* %i
ret i32 %x
}
-define ptx_device i32 @t3(i32* %p, i32 %q) {
+define ptx_device i64 @t2_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1+8];
+;CHECK-NEXT: ret;
+ %i = getelementptr i64* %p, i32 1
+ %x = load i64* %i
+ ret i64 %x
+}
+
+define ptx_device float @t2_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1+4];
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 1
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device double @t2_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1+8];
+;CHECK-NEXT: ret;
+ %i = getelementptr double* %p, i32 1
+ %x = load double* %i
+ ret double %x
+}
+
+define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u16 rh0, [r0];
+ %i = getelementptr i16* %p, i32 %q
+ %x = load i16* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
entry:
;CHECK: shl.b32 r0, r2, 2;
-;CHECK: add.s32 r0, r1, r0;
-;CHECK: ld.global.s32 r0, [r0];
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u32 r0, [r0];
%i = getelementptr i32* %p, i32 %q
%x = load i32* %i
ret i32 %x
}
-define ptx_device i32 @t4_global() {
+define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u64 rd0, [r0];
+ %i = getelementptr i64* %p, i32 %q
+ %x = load i64* %i
+ ret i64 %x
+}
+
+define ptx_device float @t3_f32(float* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f32 f0, [r0];
+ %i = getelementptr float* %p, i32 %q
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device double @t3_f64(double* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f64 fd0, [r0];
+ %i = getelementptr double* %p, i32 %q
+ %x = load double* %i
+ ret double %x
+}
+
+define ptx_device i16 @t4_global_u16() {
+entry:
+;CHECK: ld.global.u16 rh0, [array_i16];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
+ %x = load i16* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t4_global_u32() {
entry:
-;CHECK: ld.global.s32 r0, [array];
- %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+;CHECK: ld.global.u32 r0, [array_i32];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
%x = load i32* %i
ret i32 %x
}
-define ptx_device i32 @t4_const() {
+define ptx_device i64 @t4_global_u64() {
entry:
-;CHECK: ld.const.s32 r0, [array_constant];
- %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
+;CHECK: ld.global.u64 rd0, [array_i64];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+ %x = load i64* %i
+ ret i64 %x
+}
+
+define ptx_device float @t4_global_f32() {
+entry:
+;CHECK: ld.global.f32 f0, [array_float];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device double @t4_global_f64() {
+entry:
+;CHECK: ld.global.f64 fd0, [array_double];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+ %x = load double* %i
+ ret double %x
+}
+
+define ptx_device i16 @t4_const_u16() {
+entry:
+;CHECK: ld.const.u16 rh0, [array_constant_i16];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
+ %x = load i16 addrspace(1)* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t4_const_u32() {
+entry:
+;CHECK: ld.const.u32 r0, [array_constant_i32];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
%x = load i32 addrspace(1)* %i
ret i32 %x
}
-define ptx_device i32 @t4_local() {
+define ptx_device i64 @t4_const_u64() {
+entry:
+;CHECK: ld.const.u64 rd0, [array_constant_i64];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
+ %x = load i64 addrspace(1)* %i
+ ret i64 %x
+}
+
+define ptx_device float @t4_const_f32() {
entry:
-;CHECK: ld.local.s32 r0, [array_local];
- %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+;CHECK: ld.const.f32 f0, [array_constant_float];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
+ %x = load float addrspace(1)* %i
+ ret float %x
+}
+
+define ptx_device double @t4_const_f64() {
+entry:
+;CHECK: ld.const.f64 fd0, [array_constant_double];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
+ %x = load double addrspace(1)* %i
+ ret double %x
+}
+
+define ptx_device i16 @t4_local_u16() {
+entry:
+;CHECK: ld.local.u16 rh0, [array_local_i16];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+ %x = load i16 addrspace(2)* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t4_local_u32() {
+entry:
+;CHECK: ld.local.u32 r0, [array_local_i32];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
%x = load i32 addrspace(2)* %i
ret i32 %x
}
-define ptx_device i32 @t4_shared() {
+define ptx_device i64 @t4_local_u64() {
entry:
-;CHECK: ld.shared.s32 r0, [array_shared];
- %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+;CHECK: ld.local.u64 rd0, [array_local_i64];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+ %x = load i64 addrspace(2)* %i
+ ret i64 %x
+}
+
+define ptx_device float @t4_local_f32() {
+entry:
+;CHECK: ld.local.f32 f0, [array_local_float];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+ %x = load float addrspace(2)* %i
+ ret float %x
+}
+
+define ptx_device double @t4_local_f64() {
+entry:
+;CHECK: ld.local.f64 fd0, [array_local_double];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+ %x = load double addrspace(2)* %i
+ ret double %x
+}
+
+define ptx_device i16 @t4_shared_u16() {
+entry:
+;CHECK: ld.shared.u16 rh0, [array_shared_i16];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+ %x = load i16 addrspace(4)* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t4_shared_u32() {
+entry:
+;CHECK: ld.shared.u32 r0, [array_shared_i32];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
%x = load i32 addrspace(4)* %i
ret i32 %x
}
-define ptx_device i32 @t5() {
+define ptx_device i64 @t4_shared_u64() {
+entry:
+;CHECK: ld.shared.u64 rd0, [array_shared_i64];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+ %x = load i64 addrspace(4)* %i
+ ret i64 %x
+}
+
+define ptx_device float @t4_shared_f32() {
+entry:
+;CHECK: ld.shared.f32 f0, [array_shared_float];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+ %x = load float addrspace(4)* %i
+ ret float %x
+}
+
+define ptx_device double @t4_shared_f64() {
+entry:
+;CHECK: ld.shared.f64 fd0, [array_shared_double];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+ %x = load double addrspace(4)* %i
+ ret double %x
+}
+
+define ptx_device i16 @t5_u16() {
entry:
-;CHECK: ld.global.s32 r0, [array+4];
- %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+;CHECK: ld.global.u16 rh0, [array_i16+2];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+ %x = load i16* %i
+ ret i16 %x
+}
+
+define ptx_device i32 @t5_u32() {
+entry:
+;CHECK: ld.global.u32 r0, [array_i32+4];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
%x = load i32* %i
ret i32 %x
}
+
+define ptx_device i64 @t5_u64() {
+entry:
+;CHECK: ld.global.u64 rd0, [array_i64+8];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+ %x = load i64* %i
+ ret i64 %x
+}
+
+define ptx_device float @t5_f32() {
+entry:
+;CHECK: ld.global.f32 f0, [array_float+4];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+ %x = load float* %i
+ ret float %x
+}
+
+define ptx_device double @t5_f64() {
+entry:
+;CHECK: ld.global.f64 fd0, [array_double+8];
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+ %x = load double* %i
+ ret double %x
+}
Removed: llvm/trunk/test/CodeGen/PTX/ld_float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/ld_float.ll?rev=126823&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/ld_float.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/ld_float.ll (removed)
@@ -1,86 +0,0 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
-
-;CHECK: .extern .global .f32 array[];
- at array = external global [10 x float]
-
-;CHECK: .extern .const .f32 array_constant[];
- at array_constant = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .local .f32 array_local[];
- at array_local = external addrspace(2) global [10 x float]
-
-;CHECK: .extern .shared .f32 array_shared[];
- at array_shared = external addrspace(4) global [10 x float]
-
-define ptx_device float @t1(float* %p) {
-entry:
-;CHECK: ld.global.f32 f0, [r1];
-;CHECK-NEXT: ret;
- %x = load float* %p
- ret float %x
-}
-
-define ptx_device float @t2(float* %p) {
-entry:
-;CHECK: ld.global.f32 f0, [r1+4];
-;CHECK-NEXT: ret;
- %i = getelementptr float* %p, i32 1
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device float @t3(float* %p, i32 %q) {
-entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.s32 r0, r1, r0;
-;CHECK-NEXT: ld.global.f32 f0, [r0];
-;CHECK-NEXT: ret;
- %i = getelementptr float* %p, i32 %q
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device float @t4_global() {
-entry:
-;CHECK: ld.global.f32 f0, [array];
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float]* @array, i32 0, i32 0
- %x = load float* %i
- ret float %x
-}
-
-define ptx_device float @t4_const() {
-entry:
-;CHECK: ld.const.f32 f0, [array_constant];
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
- %x = load float addrspace(1)* %i
- ret float %x
-}
-
-define ptx_device float @t4_local() {
-entry:
-;CHECK: ld.local.f32 f0, [array_local];
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
- %x = load float addrspace(2)* %i
- ret float %x
-}
-
-define ptx_device float @t4_shared() {
-entry:
-;CHECK: ld.shared.f32 f0, [array_shared];
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
- %x = load float addrspace(4)* %i
- ret float %x
-}
-
-define ptx_device float @t5() {
-entry:
-;CHECK: ld.global.f32 f0, [array+4];
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float]* @array, i32 0, i32 1
- %x = load float* %i
- ret float %x
-}
Modified: llvm/trunk/test/CodeGen/PTX/mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/mov.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/mov.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/mov.ll Tue Mar 1 21:20:28 2011
@@ -1,25 +1,62 @@
; RUN: llc < %s -march=ptx | FileCheck %s
-define ptx_device i32 @t1() {
-; CHECK: mov.s32 r0, 0;
+define ptx_device i16 @t1_u16() {
+; CHECK: mov.u16 rh0, 0;
+; CHECK: ret;
+ ret i16 0
+}
+
+define ptx_device i32 @t1_u32() {
+; CHECK: mov.u32 r0, 0;
; CHECK: ret;
ret i32 0
}
-define ptx_device i32 @t2(i32 %x) {
-; CHECK: mov.s32 r0, r1;
+define ptx_device i64 @t1_u64() {
+; CHECK: mov.u64 rd0, 0;
; CHECK: ret;
- ret i32 %x
+ ret i64 0
}
-define ptx_device float @t3() {
+define ptx_device float @t1_f32() {
; CHECK: mov.f32 f0, 0F00000000;
-; CHECK-NEXT: ret;
+; CHECK: ret;
ret float 0.0
}
-define ptx_device float @t4(float %x) {
+define ptx_device double @t1_f64() {
+; CHECK: mov.f64 fd0, 0D0000000000000000;
+; CHECK: ret;
+ ret double 0.0
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: mov.u16 rh0, rh1;
+; CHECK: ret;
+ ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: mov.u32 r0, r1;
+; CHECK: ret;
+ ret i32 %x
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: mov.u64 rd0, rd1;
+; CHECK: ret;
+ ret i64 %x
+}
+
+define ptx_device float @t3_f32(float %x) {
; CHECK: mov.f32 f0, f1;
; CHECK-NEXT: ret;
ret float %x
}
+
+define ptx_device double @t3_f64(double %x) {
+; CHECK: mov.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+ ret double %x
+}
+
Modified: llvm/trunk/test/CodeGen/PTX/mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/mul.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/mul.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/mul.ll Tue Mar 1 21:20:28 2011
@@ -10,16 +10,30 @@
; ret i32 %z
;}
-define ptx_device float @t3(float %x, float %y) {
+define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: mul.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fmul float %x, %y
ret float %z
}
-define ptx_device float @t4(float %x) {
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: mul.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+ %z = fmul double %x, %y
+ ret double %z
+}
+
+define ptx_device float @t2_f32(float %x) {
; CHECK: mul.f32 f0, f1, 0F40A00000;
; CHECK-NEXT: ret;
%z = fmul float %x, 5.0
ret float %z
}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
+; CHECK-NEXT: ret;
+ %z = fmul double %x, 5.0
+ ret double %z
+}
Modified: llvm/trunk/test/CodeGen/PTX/options.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/options.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/options.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/options.ll Tue Mar 1 21:20:28 2011
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
-; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4"
+; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
+; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
define ptx_device void @t1() {
ret void
Modified: llvm/trunk/test/CodeGen/PTX/st.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/st.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/st.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/st.ll Tue Mar 1 21:20:28 2011
@@ -1,71 +1,382 @@
; RUN: llc < %s -march=ptx | FileCheck %s
-;CHECK: .extern .global .s32 array[];
- at array = external global [10 x i32]
+;CHECK: .extern .global .u16 array_i16[];
+ at array_i16 = external global [10 x i16]
-;CHECK: .extern .const .s32 array_constant[];
- at array_constant = external addrspace(1) constant [10 x i32]
+;CHECK: .extern .const .u16 array_constant_i16[];
+ at array_constant_i16 = external addrspace(1) constant [10 x i16]
-;CHECK: .extern .local .s32 array_local[];
- at array_local = external addrspace(2) global [10 x i32]
+;CHECK: .extern .local .u16 array_local_i16[];
+ at array_local_i16 = external addrspace(2) global [10 x i16]
-;CHECK: .extern .shared .s32 array_shared[];
- at array_shared = external addrspace(4) global [10 x i32]
+;CHECK: .extern .shared .u16 array_shared_i16[];
+ at array_shared_i16 = external addrspace(4) global [10 x i16]
-define ptx_device void @t1(i32* %p, i32 %x) {
+;CHECK: .extern .global .u32 array_i32[];
+ at array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .u32 array_constant_i32[];
+ at array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .u32 array_local_i32[];
+ at array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .u32 array_shared_i32[];
+ at array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .u64 array_i64[];
+ at array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .u64 array_constant_i64[];
+ at array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .u64 array_local_i64[];
+ at array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .u64 array_shared_i64[];
+ at array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .f32 array_float[];
+ at array_float = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant_float[];
+ at array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local_float[];
+ at array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared_float[];
+ at array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .f64 array_double[];
+ at array_double = external global [10 x double]
+
+;CHECK: .extern .const .f64 array_constant_double[];
+ at array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .f64 array_local_double[];
+ at array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .f64 array_shared_double[];
+ at array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device void @t1_u16(i16* %p, i16 %x) {
entry:
-;CHECK: st.global.s32 [r1], r2;
+;CHECK: st.global.u16 [r1], rh1;
+;CHECK-NEXT: ret;
+ store i16 %x, i16* %p
+ ret void
+}
+
+define ptx_device void @t1_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1], r2;
+;CHECK-NEXT: ret;
store i32 %x, i32* %p
ret void
}
-define ptx_device void @t2(i32* %p, i32 %x) {
+define ptx_device void @t1_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1], rd1;
+;CHECK-NEXT: ret;
+ store i64 %x, i64* %p
+ ret void
+}
+
+define ptx_device void @t1_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1], f1;
+;CHECK-NEXT: ret;
+ store float %x, float* %p
+ ret void
+}
+
+define ptx_device void @t1_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1], fd1;
+;CHECK-NEXT: ret;
+ store double %x, double* %p
+ ret void
+}
+
+define ptx_device void @t2_u16(i16* %p, i16 %x) {
entry:
-;CHECK: st.global.s32 [r1+4], r2;
+;CHECK: st.global.u16 [r1+2], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr i16* %p, i32 1
+ store i16 %x, i16* %i
+ ret void
+}
+
+define ptx_device void @t2_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1+4], r2;
+;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 1
store i32 %x, i32* %i
ret void
}
-define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
-;CHECK: .reg .s32 r0;
+define ptx_device void @t2_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1+8], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr i64* %p, i32 1
+ store i64 %x, i64* %i
+ ret void
+}
+
+define ptx_device void @t2_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1+4], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 1
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t2_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1+8], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr double* %p, i32 1
+ store double %x, double* %i
+ ret void
+}
+
+define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr i16* %p, i32 %q
+ store i16 %x, i16* %i
+ ret void
+}
+
+define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
entry:
;CHECK: shl.b32 r0, r2, 2;
-;CHECK: add.s32 r0, r1, r0;
-;CHECK: st.global.s32 [r0], r3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u32 [r0], r3;
+;CHECK-NEXT: ret;
%i = getelementptr i32* %p, i32 %q
store i32 %x, i32* %i
ret void
}
-define ptx_device void @t4_global(i32 %x) {
+define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr i64* %p, i32 %q
+ store i64 %x, i64* %i
+ ret void
+}
+
+define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr float* %p, i32 %q
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr double* %p, i32 %q
+ store double %x, double* %i
+ ret void
+}
+
+define ptx_device void @t4_global_u16(i16 %x) {
+entry:
+;CHECK: st.global.u16 [array_i16], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
+ store i16 %x, i16* %i
+ ret void
+}
+
+define ptx_device void @t4_global_u32(i32 %x) {
entry:
-;CHECK: st.global.s32 [array], r1;
- %i = getelementptr [10 x i32]* @array, i32 0, i32 0
+;CHECK: st.global.u32 [array_i32], r1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
store i32 %x, i32* %i
ret void
}
-define ptx_device void @t4_local(i32 %x) {
+define ptx_device void @t4_global_u64(i64 %x) {
+entry:
+;CHECK: st.global.u64 [array_i64], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+ store i64 %x, i64* %i
+ ret void
+}
+
+define ptx_device void @t4_global_f32(float %x) {
+entry:
+;CHECK: st.global.f32 [array_float], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t4_global_f64(double %x) {
+entry:
+;CHECK: st.global.f64 [array_double], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+ store double %x, double* %i
+ ret void
+}
+
+define ptx_device void @t4_local_u16(i16 %x) {
entry:
-;CHECK: st.local.s32 [array_local], r1;
- %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
+;CHECK: st.local.u16 [array_local_i16], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+ store i16 %x, i16 addrspace(2)* %i
+ ret void
+}
+
+define ptx_device void @t4_local_u32(i32 %x) {
+entry:
+;CHECK: st.local.u32 [array_local_i32], r1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
store i32 %x, i32 addrspace(2)* %i
ret void
}
-define ptx_device void @t4_shared(i32 %x) {
+define ptx_device void @t4_local_u64(i64 %x) {
+entry:
+;CHECK: st.local.u64 [array_local_i64], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+ store i64 %x, i64 addrspace(2)* %i
+ ret void
+}
+
+define ptx_device void @t4_local_f32(float %x) {
+entry:
+;CHECK: st.local.f32 [array_local_float], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+ store float %x, float addrspace(2)* %i
+ ret void
+}
+
+define ptx_device void @t4_local_f64(double %x) {
+entry:
+;CHECK: st.local.f64 [array_local_double], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+ store double %x, double addrspace(2)* %i
+ ret void
+}
+
+define ptx_device void @t4_shared_u16(i16 %x) {
entry:
-;CHECK: st.shared.s32 [array_shared], r1;
- %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
+;CHECK: st.shared.u16 [array_shared_i16], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+ store i16 %x, i16 addrspace(4)* %i
+ ret void
+}
+
+define ptx_device void @t4_shared_u32(i32 %x) {
+entry:
+;CHECK: st.shared.u32 [array_shared_i32], r1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
store i32 %x, i32 addrspace(4)* %i
ret void
}
-define ptx_device void @t5(i32 %x) {
+define ptx_device void @t4_shared_u64(i64 %x) {
+entry:
+;CHECK: st.shared.u64 [array_shared_i64], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+ store i64 %x, i64 addrspace(4)* %i
+ ret void
+}
+
+define ptx_device void @t4_shared_f32(float %x) {
+entry:
+;CHECK: st.shared.f32 [array_shared_float], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+ store float %x, float addrspace(4)* %i
+ ret void
+}
+
+define ptx_device void @t4_shared_f64(double %x) {
+entry:
+;CHECK: st.shared.f64 [array_shared_double], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+ store double %x, double addrspace(4)* %i
+ ret void
+}
+
+define ptx_device void @t5_u16(i16 %x) {
entry:
-;CHECK: st.global.s32 [array+4], r1;
- %i = getelementptr [10 x i32]* @array, i32 0, i32 1
+;CHECK: st.global.u16 [array_i16+2], rh1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+ store i16 %x, i16* %i
+ ret void
+}
+
+define ptx_device void @t5_u32(i32 %x) {
+entry:
+;CHECK: st.global.u32 [array_i32+4], r1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
store i32 %x, i32* %i
ret void
}
+
+define ptx_device void @t5_u64(i64 %x) {
+entry:
+;CHECK: st.global.u64 [array_i64+8], rd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+ store i64 %x, i64* %i
+ ret void
+}
+
+define ptx_device void @t5_f32(float %x) {
+entry:
+;CHECK: st.global.f32 [array_float+4], f1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+ store float %x, float* %i
+ ret void
+}
+
+define ptx_device void @t5_f64(double %x) {
+entry:
+;CHECK: st.global.f64 [array_double+8], fd1;
+;CHECK-NEXT: ret;
+ %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+ store double %x, double* %i
+ ret void
+}
Removed: llvm/trunk/test/CodeGen/PTX/st_float.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/st_float.ll?rev=126823&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/st_float.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/st_float.ll (removed)
@@ -1,78 +0,0 @@
-; RUN: llc < %s -march=ptx | FileCheck %s
-
-;CHECK: .extern .global .f32 array[];
- at array = external global [10 x float]
-
-;CHECK: .extern .const .f32 array_constant[];
- at array_constant = external addrspace(1) constant [10 x float]
-
-;CHECK: .extern .local .f32 array_local[];
- at array_local = external addrspace(2) global [10 x float]
-
-;CHECK: .extern .shared .f32 array_shared[];
- at array_shared = external addrspace(4) global [10 x float]
-
-define ptx_device void @t1(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [r1], f1;
-;CHECK-NEXT: ret;
- store float %x, float* %p
- ret void
-}
-
-define ptx_device void @t2(float* %p, float %x) {
-entry:
-;CHECK: st.global.f32 [r1+4], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr float* %p, i32 1
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t3(float* %p, i32 %q, float %x) {
-;CHECK: .reg .s32 r0;
-entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.s32 r0, r1, r0;
-;CHECK-NEXT: st.global.f32 [r0], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr float* %p, i32 %q
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t4_global(float %x) {
-entry:
-;CHECK: st.global.f32 [array], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float]* @array, i32 0, i32 0
- store float %x, float* %i
- ret void
-}
-
-define ptx_device void @t4_local(float %x) {
-entry:
-;CHECK: st.local.f32 [array_local], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
- store float %x, float addrspace(2)* %i
- ret void
-}
-
-define ptx_device void @t4_shared(float %x) {
-entry:
-;CHECK: st.shared.f32 [array_shared], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
- store float %x, float addrspace(4)* %i
- ret void
-}
-
-define ptx_device void @t5(float %x) {
-entry:
-;CHECK: st.global.f32 [array+4], f1;
-;CHECK-NEXT: ret;
- %i = getelementptr [10 x float]* @array, i32 0, i32 1
- store float %x, float* %i
- ret void
-}
Modified: llvm/trunk/test/CodeGen/PTX/sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/sub.ll?rev=126824&r1=126823&r2=126824&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/sub.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/sub.ll Tue Mar 1 21:20:28 2011
@@ -1,29 +1,71 @@
; RUN: llc < %s -march=ptx | FileCheck %s
-define ptx_device i32 @t1(i32 %x, i32 %y) {
-;CHECK: sub.s32 r0, r1, r2;
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: sub.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+ %z = sub i16 %x, %y
+ ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: sub.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
%z = sub i32 %x, %y
-;CHECK: ret;
ret i32 %z
}
-define ptx_device i32 @t2(i32 %x) {
-;CHECK: add.s32 r0, r1, -1;
- %z = sub i32 %x, 1
-;CHECK: ret;
- ret i32 %z
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: sub.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+ %z = sub i64 %x, %y
+ ret i64 %z
}
-define ptx_device float @t3(float %x, float %y) {
+define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: sub.f32 f0, f1, f2
; CHECK-NEXT: ret;
%z = fsub float %x, %y
ret float %z
}
-define ptx_device float @t4(float %x) {
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: sub.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+ %z = fsub double %x, %y
+ ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, -1;
+; CHECK-NEXT: ret;
+ %z = sub i16 %x, 1
+ ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, -1;
+; CHECK-NEXT: ret;
+ %z = sub i32 %x, 1
+ ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, -1;
+; CHECK-NEXT: ret;
+ %z = sub i64 %x, 1
+ ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
; CHECK: add.f32 f0, f1, 0FBF800000;
; CHECK-NEXT: ret;
%z = fsub float %x, 1.0
ret float %z
}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
+; CHECK-NEXT: ret;
+ %z = fsub double %x, 1.0
+ ret double %z
+}
More information about the llvm-commits
mailing list