[llvm-commits] [llvm] r133814 - in /llvm/trunk: lib/Target/PTX/PTXAsmPrinter.cpp lib/Target/PTX/PTXCallingConv.td lib/Target/PTX/PTXISelLowering.cpp lib/Target/PTX/PTXInstrInfo.cpp lib/Target/PTX/PTXInstrInfo.td lib/Target/PTX/PTXRegisterInfo.td lib/Target/PTX/generate-register-td.py test/CodeGen/PTX/cvt.ll test/CodeGen/PTX/ld.ll test/CodeGen/PTX/mov.ll test/CodeGen/PTX/st.ll

Dan Bailey dan at dneg.com
Fri Jun 24 12:27:10 PDT 2011


Author: drb
Date: Fri Jun 24 14:27:10 2011
New Revision: 133814

URL: http://llvm.org/viewvc/llvm-project?rev=133814&view=rev
Log:
PTX: Add support for i8 type and introduce associated .b8 registers

The i8 type is required for boolean values, but can only use ld, st and mov instructions. The i1 type continues to be used for predicates.


Modified:
    llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
    llvm/trunk/lib/Target/PTX/PTXCallingConv.td
    llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
    llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
    llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
    llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td
    llvm/trunk/lib/Target/PTX/generate-register-td.py
    llvm/trunk/test/CodeGen/PTX/cvt.ll
    llvm/trunk/test/CodeGen/PTX/ld.ll
    llvm/trunk/test/CodeGen/PTX/mov.ll
    llvm/trunk/test/CodeGen/PTX/st.ll

Modified: llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXAsmPrinter.cpp Fri Jun 24 14:27:10 2011
@@ -92,6 +92,7 @@
 #define TEST_REGCLS(cls, clsstr)                \
   if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
   TEST_REGCLS(RegPred, pred);
+  TEST_REGCLS(RegI8,  b8);
   TEST_REGCLS(RegI16, b16);
   TEST_REGCLS(RegI32, b32);
   TEST_REGCLS(RegI64, b64);
@@ -124,6 +125,7 @@
       case Type::IntegerTyID:
         switch (type->getPrimitiveSizeInBits()) {
           default: llvm_unreachable("Unknown integer bit-width");
+          case 8:  return ".u8";
           case 16: return ".u16";
           case 32: return ".u32";
           case 64: return ".u64";

Modified: llvm/trunk/lib/Target/PTX/PTXCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXCallingConv.td?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXCallingConv.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXCallingConv.td Fri Jun 24 14:27:10 2011
@@ -15,6 +15,7 @@
 // PTX Formal Parameter Calling Convention
 def CC_PTX : CallingConv<[
   CCIfType<[i1],      CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
+  CCIfType<[i8],      CCAssignToReg<[RQ12, RQ13, RQ14, RQ15, RQ16, RQ17, RQ18, RQ19, RQ20, RQ21, RQ22, RQ23, RQ24, RQ25, RQ26, RQ27, RQ28, RQ29, RQ30, RQ31, RQ32, RQ33, RQ34, RQ35, RQ36, RQ37, RQ38, RQ39, RQ40, RQ41, RQ42, RQ43, RQ44, RQ45, RQ46, RQ47, RQ48, RQ49, RQ50, RQ51, RQ52, RQ53, RQ54, RQ55, RQ56, RQ57, RQ58, RQ59, RQ60, RQ61, RQ62, RQ63, RQ64, RQ65, RQ66, RQ67, RQ68, RQ69, RQ70, RQ71, RQ72, RQ73, RQ74, RQ75, RQ76, RQ77, RQ78, RQ79, RQ80, RQ81, RQ82, RQ83, RQ84, RQ85, RQ86, RQ87, RQ88, RQ89, RQ90, RQ91, RQ92, RQ93, RQ94, RQ95, RQ96, RQ97, RQ98, RQ99, RQ100, RQ101, RQ102, RQ103, RQ104, RQ105, RQ106, RQ107, RQ108, RQ109, RQ110, RQ111, RQ112, RQ113, RQ114, RQ115, RQ116, RQ117, RQ118, RQ119, RQ120, RQ121, RQ122, RQ123, RQ124, RQ125, RQ126, RQ127]>>,
   CCIfType<[i16],     CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
   CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
   CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
@@ -23,6 +24,7 @@
 // PTX Return Value Calling Convention
 def RetCC_PTX : CallingConv<[
   CCIfType<[i1],      CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
+  CCIfType<[i8],      CCAssignToReg<[RQ0, RQ1, RQ2, RQ3, RQ4, RQ5, RQ6, RQ7, RQ8, RQ9, RQ10, RQ11]>>,
   CCIfType<[i16],     CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
   CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
   CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>

Modified: llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXISelLowering.cpp Fri Jun 24 14:27:10 2011
@@ -40,6 +40,7 @@
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the register classes.
   addRegisterClass(MVT::i1,  PTX::RegPredRegisterClass);
+  addRegisterClass(MVT::i8,  PTX::RegI8RegisterClass);
   addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
   addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
   addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
@@ -52,10 +53,20 @@
 
   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
+  
+  // Promote i1 type
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  
+  setTruncStoreAction(MVT::i8, MVT::i1, Promote);
+  
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  
   // Turn i16 (z)extload into load + (z)extend
   setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
 
   // Turn f32 extload into load + fextend
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
@@ -176,6 +187,7 @@
   bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
 } argmap[] = {
   argmap_entry(MVT::i1,  PTX::RegPredRegisterClass),
+  argmap_entry(MVT::i8,  PTX::RegI8RegisterClass),
   argmap_entry(MVT::i16, PTX::RegI16RegisterClass),
   argmap_entry(MVT::i32, PTX::RegI32RegisterClass),
   argmap_entry(MVT::i64, PTX::RegI64RegisterClass),
@@ -252,6 +264,9 @@
       if (RegVT == MVT::i1) {
         TRC = PTX::RegPredRegisterClass;
       }
+      else if (RegVT == MVT::i8) {
+        TRC = PTX::RegI8RegisterClass;
+      }
       else if (RegVT == MVT::i16) {
         TRC = PTX::RegI16RegisterClass;
       }

Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.cpp Fri Jun 24 14:27:10 2011
@@ -33,6 +33,7 @@
   const TargetRegisterClass *cls;
   const int opcode;
 } map[] = {
+  { &PTX::RegI8RegClass,  PTX::MOVU8rr },
   { &PTX::RegI16RegClass, PTX::MOVU16rr },
   { &PTX::RegI32RegClass, PTX::MOVU32rr },
   { &PTX::RegI64RegClass, PTX::MOVU64rr },
@@ -302,7 +303,9 @@
   int OpCode;
 
   // Select the appropriate opcode based on the register class
-  if (RC == PTX::RegI16RegisterClass) {
+  if (RC == PTX::RegI8RegisterClass) {
+    OpCode = PTX::STACKSTOREI8;
+  } else if (RC == PTX::RegI16RegisterClass) {
     OpCode = PTX::STACKSTOREI16;
   }  else if (RC == PTX::RegI32RegisterClass) {
     OpCode = PTX::STACKSTOREI32;
@@ -337,7 +340,9 @@
   int OpCode;
 
   // Select the appropriate opcode based on the register class
-  if (RC == PTX::RegI16RegisterClass) {
+  if (RC == PTX::RegI8RegisterClass) {
+    OpCode = PTX::STACKLOADI8;
+  } else if (RC == PTX::RegI16RegisterClass) {
     OpCode = PTX::STACKLOADI16;
   } else if (RC == PTX::RegI32RegisterClass) {
     OpCode = PTX::STACKLOADI32;

Modified: llvm/trunk/lib/Target/PTX/PTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXInstrInfo.td?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXInstrInfo.td Fri Jun 24 14:27:10 2011
@@ -537,6 +537,7 @@
 }
 
 multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+  defm u8  : PTX_LD<opstr, ".u8",  RegI8,  pat_load>;
   defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
   defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
   defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
@@ -572,6 +573,7 @@
 }
 
 multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+  defm u8  : PTX_ST<opstr, ".u8",  RegI8,  pat_store>;
   defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
   defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
   defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
@@ -783,22 +785,27 @@
 let neverHasSideEffects = 1 in {
   def MOVPREDrr
     : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
+  def MOVU8rr
+    : InstPTX<(outs RegI8:$d),   (ins RegI8:$a),   "mov.u8\t$d, $a",  []>;
   def MOVU16rr
-    : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
+    : InstPTX<(outs RegI16:$d),  (ins RegI16:$a),  "mov.u16\t$d, $a", []>;
   def MOVU32rr
-    : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
+    : InstPTX<(outs RegI32:$d),  (ins RegI32:$a),  "mov.u32\t$d, $a", []>;
   def MOVU64rr
-    : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
+    : InstPTX<(outs RegI64:$d),  (ins RegI64:$a),  "mov.u64\t$d, $a", []>;
   def MOVF32rr
-    : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
+    : InstPTX<(outs RegF32:$d),  (ins RegF32:$a),  "mov.f32\t$d, $a", []>;
   def MOVF64rr
-    : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
+    : InstPTX<(outs RegF64:$d),  (ins RegF64:$a),  "mov.f64\t$d, $a", []>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVPREDri
     : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
               [(set RegPred:$d, imm:$a)]>;
+  def MOVU8ri
+    : InstPTX<(outs RegI8:$d),  (ins i8imm:$a),  "mov.u8\t$d, $a",
+              [(set RegI8:$d, imm:$a)]>;
   def MOVU16ri
     : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
               [(set RegI16:$d, imm:$a)]>;
@@ -838,6 +845,9 @@
   def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
                          "ld.param.pred\t$d, [$a]",
                          [(set RegPred:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU8   : InstPTX<(outs RegI8:$d), (ins MEMpi:$a),
+                         "ld.param.u8\t$d, [$a]",
+                         [(set RegI8:$d,  (PTXloadparam timm:$a))]>;
   def LDpiU16  : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
                          "ld.param.u16\t$d, [$a]",
                          [(set RegI16:$d, (PTXloadparam timm:$a))]>;
@@ -857,6 +867,9 @@
   def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
                          "st.param.pred\t[$d], $a",
                          [(PTXstoreparam timm:$d, RegPred:$a)]>;
+  def STpiU8   : InstPTX<(outs), (ins MEMret:$d, RegI8:$a),
+                         "st.param.u8\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI8:$a)]>;
   def STpiU16  : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
                          "st.param.u16\t[$d], $a",
                          [(PTXstoreparam timm:$d, RegI16:$a)]>;
@@ -887,6 +900,10 @@
 // PTX does not directly support converting to a predicate type, so we fake it
 // by performing a greater-than test between the value and zero.  This follows
 // the C convention that any non-zero value is equivalent to 'true'.
+def CVT_pred_u8
+  : InstPTX<(outs RegPred:$d), (ins RegI8:$a), "setp.gt.b8\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI8:$a))]>;
+
 def CVT_pred_u16
   : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.b16\t$d, $a, 0",
             [(set RegPred:$d, (trunc RegI16:$a))]>;
@@ -907,6 +924,34 @@
   : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.b64\t$d, $a, 0",
             [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
 
+// Conversion to u8
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+def CVT_u8_pred
+  : InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
+            [(set RegI8:$d, (zext RegPred:$a))]>;
+
+def CVT_u8_preds
+  : InstPTX<(outs RegI8:$d), (ins RegPred:$a), "selp.u8\t$d, 1, 0, $a",
+            [(set RegI8:$d, (sext RegPred:$a))]>;
+
+def CVT_u8_u32
+  : InstPTX<(outs RegI8:$d), (ins RegI32:$a), "cvt.u8.u32\t$d, $a",
+            [(set RegI8:$d, (trunc RegI32:$a))]>;
+
+def CVT_u8_u64
+  : InstPTX<(outs RegI8:$d), (ins RegI64:$a), "cvt.u8.u64\t$d, $a",
+            [(set RegI8:$d, (trunc RegI64:$a))]>;
+
+def CVT_u8_f32
+  : InstPTX<(outs RegI8:$d), (ins RegF32:$a), "cvt.rzi.u8.f32\t$d, $a",
+            [(set RegI8:$d, (fp_to_uint RegF32:$a))]>;
+
+def CVT_u8_f64
+  : InstPTX<(outs RegI8:$d), (ins RegF64:$a), "cvt.rzi.u8.f64\t$d, $a",
+            [(set RegI8:$d, (fp_to_uint RegF64:$a))]>;
+
 // Conversion to u16
 // PTX does not directly support converting a predicate to a value, so we
 // use a select instruction to select either 0 or 1 (integer or fp) based
@@ -915,6 +960,18 @@
   : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
             [(set RegI16:$d, (zext RegPred:$a))]>;
 
+def CVT_u16_preds
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (sext RegPred:$a))]>;
+
+def CVT_u16_u8
+  : InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.u8\t$d, $a",
+            [(set RegI16:$d, (zext RegI8:$a))]>;
+
+def CVT_u16_s8
+  : InstPTX<(outs RegI16:$d), (ins RegI8:$a), "cvt.u16.s8\t$d, $a",
+            [(set RegI16:$d, (sext RegI8:$a))]>;
+
 def CVT_u16_u32
   : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
             [(set RegI16:$d, (trunc RegI32:$a))]>;
@@ -937,10 +994,26 @@
   : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
             [(set RegI32:$d, (zext RegPred:$a))]>;
 
+def CVT_u32_u8
+  : InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.u8\t$d, $a",
+            [(set RegI32:$d, (zext RegI8:$a))]>;
+
 def CVT_u32_u16
   : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
             [(set RegI32:$d, (zext RegI16:$a))]>;
 
+def CVT_u32_preds
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (sext RegPred:$a))]>;
+
+def CVT_u32_s8
+  : InstPTX<(outs RegI32:$d), (ins RegI8:$a), "cvt.u32.s8\t$d, $a",
+            [(set RegI32:$d, (zext RegI8:$a))]>;
+
+def CVT_u32_s16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
+            [(set RegI32:$d, (sext RegI16:$a))]>;
+
 def CVT_u32_u64
   : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
             [(set RegI32:$d, (trunc RegI64:$a))]>;
@@ -959,6 +1032,10 @@
   : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
             [(set RegI64:$d, (zext RegPred:$a))]>;
 
+def CVT_u64_u8
+  : InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.u8\t$d, $a",
+            [(set RegI64:$d, (zext RegI8:$a))]>;
+
 def CVT_u64_u16
   : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
             [(set RegI64:$d, (zext RegI16:$a))]>;
@@ -967,6 +1044,22 @@
   : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
             [(set RegI64:$d, (zext RegI32:$a))]>;
 
+def CVT_u64_preds
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (sext RegPred:$a))]>;
+
+def CVT_u64_s8
+  : InstPTX<(outs RegI64:$d), (ins RegI8:$a), "cvt.u64.s8\t$d, $a",
+            [(set RegI64:$d, (zext RegI8:$a))]>;
+
+def CVT_u64_s16
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
+            [(set RegI64:$d, (sext RegI16:$a))]>;
+
+def CVT_u64_s32
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
+            [(set RegI64:$d, (sext RegI32:$a))]>;
+
 def CVT_u64_f32
   : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
             [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
@@ -982,6 +1075,10 @@
             "selp.f32\t$d, 0F3F800000, 0F00000000, $a",  // 1.0
             [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
 
+def CVT_f32_u8
+  : InstPTX<(outs RegF32:$d), (ins RegI8:$a), "cvt.rn.f32.u8\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI8:$a))]>;
+
 def CVT_f32_u16
   : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
             [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
@@ -1005,6 +1102,10 @@
             "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a",  // 1.0
             [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
 
+def CVT_f64_u8
+  : InstPTX<(outs RegF64:$d), (ins RegI8:$a), "cvt.rn.f64.u8\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI8:$a))]>;
+
 def CVT_f64_u16
   : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
             [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
@@ -1043,6 +1144,8 @@
 
 ///===- Spill Instructions ------------------------------------------------===//
 // Special instructions used for stack spilling
+def STACKSTOREI8  : InstPTX<(outs), (ins i32imm:$d, RegI8:$a),
+                            "mov.u8\ts$d, $a", []>;
 def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
                             "mov.u16\ts$d, $a", []>;
 def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
@@ -1054,6 +1157,8 @@
 def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
                             "mov.f64\ts$d, $a", []>;
 
+def STACKLOADI8  : InstPTX<(outs), (ins RegI8:$d, i32imm:$a),
+                           "mov.u8\t$d, s$a", []>;
 def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
                            "mov.u16\t$d, s$a", []>;
 def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),

Modified: llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/PTX/PTXRegisterInfo.td Fri Jun 24 14:27:10 2011
@@ -151,6 +151,137 @@
 def P126 : PTXReg<"p126">;
 def P127 : PTXReg<"p127">;
 
+///===- 8-Bit Registers --------------------------------------------------===//
+
+def RQ0 : PTXReg<"rq0">;
+def RQ1 : PTXReg<"rq1">;
+def RQ2 : PTXReg<"rq2">;
+def RQ3 : PTXReg<"rq3">;
+def RQ4 : PTXReg<"rq4">;
+def RQ5 : PTXReg<"rq5">;
+def RQ6 : PTXReg<"rq6">;
+def RQ7 : PTXReg<"rq7">;
+def RQ8 : PTXReg<"rq8">;
+def RQ9 : PTXReg<"rq9">;
+def RQ10 : PTXReg<"rq10">;
+def RQ11 : PTXReg<"rq11">;
+def RQ12 : PTXReg<"rq12">;
+def RQ13 : PTXReg<"rq13">;
+def RQ14 : PTXReg<"rq14">;
+def RQ15 : PTXReg<"rq15">;
+def RQ16 : PTXReg<"rq16">;
+def RQ17 : PTXReg<"rq17">;
+def RQ18 : PTXReg<"rq18">;
+def RQ19 : PTXReg<"rq19">;
+def RQ20 : PTXReg<"rq20">;
+def RQ21 : PTXReg<"rq21">;
+def RQ22 : PTXReg<"rq22">;
+def RQ23 : PTXReg<"rq23">;
+def RQ24 : PTXReg<"rq24">;
+def RQ25 : PTXReg<"rq25">;
+def RQ26 : PTXReg<"rq26">;
+def RQ27 : PTXReg<"rq27">;
+def RQ28 : PTXReg<"rq28">;
+def RQ29 : PTXReg<"rq29">;
+def RQ30 : PTXReg<"rq30">;
+def RQ31 : PTXReg<"rq31">;
+def RQ32 : PTXReg<"rq32">;
+def RQ33 : PTXReg<"rq33">;
+def RQ34 : PTXReg<"rq34">;
+def RQ35 : PTXReg<"rq35">;
+def RQ36 : PTXReg<"rq36">;
+def RQ37 : PTXReg<"rq37">;
+def RQ38 : PTXReg<"rq38">;
+def RQ39 : PTXReg<"rq39">;
+def RQ40 : PTXReg<"rq40">;
+def RQ41 : PTXReg<"rq41">;
+def RQ42 : PTXReg<"rq42">;
+def RQ43 : PTXReg<"rq43">;
+def RQ44 : PTXReg<"rq44">;
+def RQ45 : PTXReg<"rq45">;
+def RQ46 : PTXReg<"rq46">;
+def RQ47 : PTXReg<"rq47">;
+def RQ48 : PTXReg<"rq48">;
+def RQ49 : PTXReg<"rq49">;
+def RQ50 : PTXReg<"rq50">;
+def RQ51 : PTXReg<"rq51">;
+def RQ52 : PTXReg<"rq52">;
+def RQ53 : PTXReg<"rq53">;
+def RQ54 : PTXReg<"rq54">;
+def RQ55 : PTXReg<"rq55">;
+def RQ56 : PTXReg<"rq56">;
+def RQ57 : PTXReg<"rq57">;
+def RQ58 : PTXReg<"rq58">;
+def RQ59 : PTXReg<"rq59">;
+def RQ60 : PTXReg<"rq60">;
+def RQ61 : PTXReg<"rq61">;
+def RQ62 : PTXReg<"rq62">;
+def RQ63 : PTXReg<"rq63">;
+def RQ64 : PTXReg<"rq64">;
+def RQ65 : PTXReg<"rq65">;
+def RQ66 : PTXReg<"rq66">;
+def RQ67 : PTXReg<"rq67">;
+def RQ68 : PTXReg<"rq68">;
+def RQ69 : PTXReg<"rq69">;
+def RQ70 : PTXReg<"rq70">;
+def RQ71 : PTXReg<"rq71">;
+def RQ72 : PTXReg<"rq72">;
+def RQ73 : PTXReg<"rq73">;
+def RQ74 : PTXReg<"rq74">;
+def RQ75 : PTXReg<"rq75">;
+def RQ76 : PTXReg<"rq76">;
+def RQ77 : PTXReg<"rq77">;
+def RQ78 : PTXReg<"rq78">;
+def RQ79 : PTXReg<"rq79">;
+def RQ80 : PTXReg<"rq80">;
+def RQ81 : PTXReg<"rq81">;
+def RQ82 : PTXReg<"rq82">;
+def RQ83 : PTXReg<"rq83">;
+def RQ84 : PTXReg<"rq84">;
+def RQ85 : PTXReg<"rq85">;
+def RQ86 : PTXReg<"rq86">;
+def RQ87 : PTXReg<"rq87">;
+def RQ88 : PTXReg<"rq88">;
+def RQ89 : PTXReg<"rq89">;
+def RQ90 : PTXReg<"rq90">;
+def RQ91 : PTXReg<"rq91">;
+def RQ92 : PTXReg<"rq92">;
+def RQ93 : PTXReg<"rq93">;
+def RQ94 : PTXReg<"rq94">;
+def RQ95 : PTXReg<"rq95">;
+def RQ96 : PTXReg<"rq96">;
+def RQ97 : PTXReg<"rq97">;
+def RQ98 : PTXReg<"rq98">;
+def RQ99 : PTXReg<"rq99">;
+def RQ100 : PTXReg<"rq100">;
+def RQ101 : PTXReg<"rq101">;
+def RQ102 : PTXReg<"rq102">;
+def RQ103 : PTXReg<"rq103">;
+def RQ104 : PTXReg<"rq104">;
+def RQ105 : PTXReg<"rq105">;
+def RQ106 : PTXReg<"rq106">;
+def RQ107 : PTXReg<"rq107">;
+def RQ108 : PTXReg<"rq108">;
+def RQ109 : PTXReg<"rq109">;
+def RQ110 : PTXReg<"rq110">;
+def RQ111 : PTXReg<"rq111">;
+def RQ112 : PTXReg<"rq112">;
+def RQ113 : PTXReg<"rq113">;
+def RQ114 : PTXReg<"rq114">;
+def RQ115 : PTXReg<"rq115">;
+def RQ116 : PTXReg<"rq116">;
+def RQ117 : PTXReg<"rq117">;
+def RQ118 : PTXReg<"rq118">;
+def RQ119 : PTXReg<"rq119">;
+def RQ120 : PTXReg<"rq120">;
+def RQ121 : PTXReg<"rq121">;
+def RQ122 : PTXReg<"rq122">;
+def RQ123 : PTXReg<"rq123">;
+def RQ124 : PTXReg<"rq124">;
+def RQ125 : PTXReg<"rq125">;
+def RQ126 : PTXReg<"rq126">;
+def RQ127 : PTXReg<"rq127">;
+
 ///===- 16-Bit Registers --------------------------------------------------===//
 
 def RH0 : PTXReg<"rh0">;
@@ -548,6 +679,7 @@
 //  Register classes
 //===----------------------------------------------------------------------===//
 def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
+def RegI8  : RegisterClass<"PTX", [i8],  8, (sequence "RQ%u", 0, 127)>;
 def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
 def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
 def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;

Modified: llvm/trunk/lib/Target/PTX/generate-register-td.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PTX/generate-register-td.py?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PTX/generate-register-td.py (original)
+++ llvm/trunk/lib/Target/PTX/generate-register-td.py Fri Jun 24 14:27:10 2011
@@ -15,15 +15,16 @@
 from sys import argv, exit, stdout
 
 
-if len(argv) != 5:
-    print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
+if len(argv) != 6:
+    print('Usage: generate-register-td.py <num_preds> <num_8> <num_16> <num_32> <num_64>')
     exit(1)
 
 try:
     num_pred  = int(argv[1])
-    num_16bit = int(argv[2])
-    num_32bit = int(argv[3])
-    num_64bit = int(argv[4])
+    num_8bit  = int(argv[2])
+    num_16bit = int(argv[3])
+    num_32bit = int(argv[4])
+    num_64bit = int(argv[5])
 except:
     print('ERROR: Invalid integer parameter')
     exit(1)
@@ -60,6 +61,11 @@
 for r in range(0, num_pred):
     td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
 
+# Print 8-bit registers
+td_file.write('\n///===- 8-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_8bit):
+    td_file.write('def RQ%d : PTXReg<"rq%d">;\n' % (r, r))
+
 # Print 16-bit registers
 td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
 for r in range(0, num_16bit):
@@ -86,6 +92,7 @@
 # Print register classes
 
 td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
+td_file.write('def RegI8  : RegisterClass<"PTX", [i8],  8, (sequence "RQ%%u", 0, %d)>;\n' % (num_8bit-1))
 td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
 td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
 td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
@@ -101,16 +108,20 @@
 # Reserve 10% of the available registers for return values, and the other 90%
 # for parameters
 num_ret_pred    = int(0.1 * num_pred)
+num_ret_8bit    = int(0.1 * num_8bit)
 num_ret_16bit   = int(0.1 * num_16bit)
 num_ret_32bit   = int(0.1 * num_32bit)
 num_ret_64bit   = int(0.1 * num_64bit)
 num_param_pred  = num_pred - num_ret_pred
+num_param_8bit = num_8bit - num_ret_8bit
 num_param_16bit = num_16bit - num_ret_16bit
 num_param_32bit = num_32bit - num_ret_32bit
 num_param_64bit = num_64bit - num_ret_64bit
 
 param_regs_pred  = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
 ret_regs_pred    = ['P%d' % i for i in range(0, num_ret_pred)]
+param_regs_8bit  = [('RQ%d' % (i+num_ret_8bit)) for i in range(0, num_param_8bit)]
+ret_regs_8bit    = ['RQ%d' % i for i in range(0, num_ret_8bit)]
 param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
 ret_regs_16bit   = ['RH%d' % i for i in range(0, num_ret_16bit)]
 param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
@@ -120,6 +131,8 @@
 
 param_list_pred  = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
 ret_list_pred    = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
+param_list_8bit  = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_8bit)
+ret_list_8bit    = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_8bit)
 param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
 ret_list_16bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
 param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
@@ -144,6 +157,7 @@
 // PTX Formal Parameter Calling Convention
 def CC_PTX : CallingConv<[
   CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i8],      CCAssignToReg<[%s]>>,
   CCIfType<[i16],     CCAssignToReg<[%s]>>,
   CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
   CCIfType<[i64,f64], CCAssignToReg<[%s]>>
@@ -152,12 +166,13 @@
 // PTX Return Value Calling Convention
 def RetCC_PTX : CallingConv<[
   CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i8],      CCAssignToReg<[%s]>>,
   CCIfType<[i16],     CCAssignToReg<[%s]>>,
   CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
   CCIfType<[i64,f64], CCAssignToReg<[%s]>>
 ]>;
-''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
-       ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
+''' % (param_list_pred, param_list_8bit, param_list_16bit, param_list_32bit, param_list_64bit,
+       ret_list_pred, ret_list_8bit, ret_list_16bit, ret_list_32bit, ret_list_64bit))
 
 
 td_file.close()

Modified: llvm/trunk/test/CodeGen/PTX/cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/cvt.ll?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/cvt.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/cvt.ll Fri Jun 24 14:27:10 2011
@@ -3,6 +3,17 @@
 ; preds 
 ; (note: we convert back to i32 to return)
 
+define ptx_device i32 @cvt_pred_i8(i8 %x, i1 %y) {
+; CHECK: setp.gt.b8 p[[P0:[0-9]+]], rq{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
+	%a = trunc i8 %x to i1
+	%b = and i1 %a, %y
+	%c = zext i1 %b to i32
+	ret i32 %c
+}
+
 define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
 ; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
 ; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
@@ -58,6 +69,43 @@
 	ret i32 %c
 }
 
+; i8
+
+define ptx_device i8 @cvt_i8_preds(i1 %x) {
+; CHECK: selp.u8 rq{{[0-9]+}}, 1, 0, p{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = zext i1 %x to i8
+	ret i8 %a
+}
+
+define ptx_device i8 @cvt_i8_i32(i32 %x) {
+; CHECK: cvt.u8.u32 rq{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = trunc i32 %x to i8
+	ret i8 %a
+}
+
+define ptx_device i8 @cvt_i8_i64(i64 %x) {
+; CHECK: cvt.u8.u64 rq{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = trunc i64 %x to i8
+	ret i8 %a
+}
+
+define ptx_device i8 @cvt_i8_f32(float %x) {
+; CHECK: cvt.rzi.u8.f32 rq{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = fptoui float %x to i8
+	ret i8 %a
+}
+
+define ptx_device i8 @cvt_i8_f64(double %x) {
+; CHECK: cvt.rzi.u8.f64 rq{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = fptoui double %x to i8
+	ret i8 %a
+}
+
 ; i16
 
 define ptx_device i16 @cvt_i16_preds(i1 %x) {
@@ -67,6 +115,13 @@
 	ret i16 %a
 }
 
+define ptx_device i16 @cvt_i16_i8(i8 %x) {
+; CHECK: cvt.u16.u8 rh{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = zext i8 %x to i16
+	ret i16 %a
+}
+
 define ptx_device i16 @cvt_i16_i32(i32 %x) {
 ; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -104,6 +159,13 @@
 	ret i32 %a
 }
 
+define ptx_device i32 @cvt_i32_i8(i8 %x) {
+; CHECK: cvt.u32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = zext i8 %x to i32
+	ret i32 %a
+}
+
 define ptx_device i32 @cvt_i32_i16(i16 %x) {
 ; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -141,6 +203,13 @@
 	ret i64 %a
 }
 
+define ptx_device i64 @cvt_i64_i8(i8 %x) {
+; CHECK: cvt.u64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = zext i8 %x to i64
+	ret i64 %a
+}
+
 define ptx_device i64 @cvt_i64_i16(i16 %x) {
 ; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -178,6 +247,13 @@
 	ret float %a
 }
 
+define ptx_device float @cvt_f32_i8(i8 %x) {
+; CHECK: cvt.rn.f32.u8 r{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = uitofp i8 %x to float
+	ret float %a
+}
+
 define ptx_device float @cvt_f32_i16(i16 %x) {
 ; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;
@@ -215,6 +291,13 @@
 	ret double %a
 }
 
+define ptx_device double @cvt_f64_i8(i8 %x) {
+; CHECK: cvt.rn.f64.u8 rd{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK-NEXT: ret;
+	%a = uitofp i8 %x to double
+	ret double %a
+}
+
 define ptx_device double @cvt_f64_i16(i16 %x) {
 ; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;

Modified: llvm/trunk/test/CodeGen/PTX/ld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/ld.ll?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/ld.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/ld.ll Fri Jun 24 14:27:10 2011
@@ -1,5 +1,17 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
+;CHECK: .extern .global .b8 array_i8[10];
+ at array_i8 = external global [10 x i8]
+
+;CHECK: .extern .const .b8 array_constant_i8[10];
+ at array_constant_i8 = external addrspace(1) constant [10 x i8]
+
+;CHECK: .extern .local .b8 array_local_i8[10];
+ at array_local_i8 = external addrspace(2) global [10 x i8]
+
+;CHECK: .extern .shared .b8 array_shared_i8[10];
+ at array_shared_i8 = external addrspace(4) global [10 x i8]
+
 ;CHECK: .extern .global .b8 array_i16[20];
 @array_i16 = external global [10 x i16]
 
@@ -60,6 +72,13 @@
 ;CHECK: .extern .shared .b8 array_shared_double[80];
 @array_shared_double = external addrspace(4) global [10 x double]
 
+define ptx_device i8 @t1_u8(i8* %p) {
+entry:
+;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}];
+;CHECK-NEXT: ret;
+  %x = load i8* %p
+  ret i8 %x
+}
 
 define ptx_device i16 @t1_u16(i16* %p) {
 entry:
@@ -101,6 +120,15 @@
   ret double %x
 }
 
+define ptx_device i8 @t2_u8(i8* %p) {
+entry:
+;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}+1];
+;CHECK-NEXT: ret;
+  %i = getelementptr i8* %p, i32 1
+  %x = load i8* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t2_u16(i16* %p) {
 entry:
 ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
@@ -146,6 +174,15 @@
   ret double %x
 }
 
+define ptx_device i8 @t3_u8(i8* %p, i32 %q) {
+entry:
+;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
+  %i = getelementptr i8* %p, i32 %q
+  %x = load i8* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
 entry:
 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
@@ -196,6 +233,16 @@
   ret double %x
 }
 
+define ptx_device i8 @t4_global_u8() {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
+;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 0
+  %x = load i8* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t4_global_u16() {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
@@ -296,6 +343,16 @@
   ret double %x
 }
 
+define ptx_device i8 @t4_local_u8() {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
+;CHECK-NEXT: ld.local.u8 rq{{[0-9]+}}, [r[[R0]]];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
+  %x = load i8 addrspace(2)* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t4_local_u16() {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
@@ -346,6 +403,16 @@
   ret double %x
 }
 
+define ptx_device i8 @t4_shared_u8() {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
+;CHECK-NEXT: ld.shared.u8 rq{{[0-9]+}}, [r[[R0]]];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
+  %x = load i8 addrspace(4)* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t4_shared_u16() {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
@@ -396,6 +463,16 @@
   ret double %x
 }
 
+define ptx_device i8 @t5_u8() {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
+;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]+1];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
+  %x = load i8* %i
+  ret i8 %x
+}
+
 define ptx_device i16 @t5_u16() {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;

Modified: llvm/trunk/test/CodeGen/PTX/mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/mov.ll?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/mov.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/mov.ll Fri Jun 24 14:27:10 2011
@@ -1,5 +1,11 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
+define ptx_device i8 @t1_u8() {
+; CHECK: mov.u8 rq{{[0-9]+}}, 0;
+; CHECK: ret;
+	ret i8 0
+}
+
 define ptx_device i16 @t1_u16() {
 ; CHECK: mov.u16 rh{{[0-9]+}}, 0;
 ; CHECK: ret;
@@ -30,6 +36,12 @@
 	ret double 0.0
 }
 
+define ptx_device i8 @t2_u8(i8 %x) {
+; CHECK: mov.u8 rq{{[0-9]+}}, rq{{[0-9]+}};
+; CHECK: ret;
+	ret i8 %x
+}
+
 define ptx_device i16 @t2_u16(i16 %x) {
 ; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK: ret;

Modified: llvm/trunk/test/CodeGen/PTX/st.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PTX/st.ll?rev=133814&r1=133813&r2=133814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PTX/st.ll (original)
+++ llvm/trunk/test/CodeGen/PTX/st.ll Fri Jun 24 14:27:10 2011
@@ -1,5 +1,17 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
+;CHECK: .extern .global .b8 array_i8[10];
+ at array_i8 = external global [10 x i8]
+
+;CHECK: .extern .const .b8 array_constant_i8[10];
+ at array_constant_i8 = external addrspace(1) constant [10 x i8]
+
+;CHECK: .extern .local .b8 array_local_i8[10];
+ at array_local_i8 = external addrspace(2) global [10 x i8]
+
+;CHECK: .extern .shared .b8 array_shared_i8[10];
+ at array_shared_i8 = external addrspace(4) global [10 x i8]
+
 ;CHECK: .extern .global .b8 array_i16[20];
 @array_i16 = external global [10 x i16]
 
@@ -60,6 +72,13 @@
 ;CHECK: .extern .shared .b8 array_shared_double[80];
 @array_shared_double = external addrspace(4) global [10 x double]
 
+define ptx_device void @t1_u8(i8* %p, i8 %x) {
+entry:
+;CHECK: st.global.u8 [r{{[0-9]+}}], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  store i8 %x, i8* %p
+  ret void
+}
 
 define ptx_device void @t1_u16(i16* %p, i16 %x) {
 entry:
@@ -101,6 +120,15 @@
   ret void
 }
 
+define ptx_device void @t2_u8(i8* %p, i8 %x) {
+entry:
+;CHECK: st.global.u8 [r{{[0-9]+}}+1], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr i8* %p, i32 1
+  store i8 %x, i8* %i
+  ret void
+}
+
 define ptx_device void @t2_u16(i16* %p, i16 %x) {
 entry:
 ;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
@@ -146,6 +174,16 @@
   ret void
 }
 
+define ptx_device void @t3_u8(i8* %p, i32 %q, i8 %x) {
+entry:
+;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr i8* %p, i32 %q
+  store i8 %x, i8* %i
+  ret void
+}
+
 define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
 entry:
 ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
@@ -201,6 +239,16 @@
   ret void
 }
 
+define ptx_device void @t4_global_u8(i8 %x) {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
+;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8]* @array_i8, i8 0, i8 0
+  store i8 %x, i8* %i
+  ret void
+}
+
 define ptx_device void @t4_global_u16(i16 %x) {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
@@ -251,6 +299,16 @@
   ret void
 }
 
+define ptx_device void @t4_local_u8(i8 %x) {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8;
+;CHECK-NEXT: st.local.u8 [r[[R0]]], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0
+  store i8 %x, i8 addrspace(2)* %i
+  ret void
+}
+
 define ptx_device void @t4_local_u16(i16 %x) {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
@@ -301,6 +359,16 @@
   ret void
 }
 
+define ptx_device void @t4_shared_u8(i8 %x) {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8;
+;CHECK-NEXT: st.shared.u8 [r[[R0]]], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0
+  store i8 %x, i8 addrspace(4)* %i
+  ret void
+}
+
 define ptx_device void @t4_shared_u16(i16 %x) {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
@@ -351,6 +419,16 @@
   ret void
 }
 
+define ptx_device void @t5_u8(i8 %x) {
+entry:
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8;
+;CHECK-NEXT: st.global.u8 [r[[R0]]+1], rq{{[0-9]+}};
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1
+  store i8 %x, i8* %i
+  ret void
+}
+
 define ptx_device void @t5_u16(i16 %x) {
 entry:
 ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;





More information about the llvm-commits mailing list