[llvm] r265089 - [NVPTX] Annotate some instructions as hasSideEffects = 0.

Thu Mar 31 18:09:05 PDT 2016

Author: jlebar
Date: Thu Mar 31 20:09:05 2016
New Revision: 265089

URL: http://llvm.org/viewvc/llvm-project?rev=265089&view=rev
Log:
[NVPTX] Annotate some instructions as hasSideEffects = 0.

Summary:
Tablegen tries to infer this from the selection DAG patterns defined for
the instructions, but it can't always.

An instructive example is CLZr64.  CLZr32 is correctly inferred to have
no side-effects, but the selection DAG pattern for CLZr64 is slightly
more complicated, and in particular the ctlz DAG node is not at the root
of the pattern.  Thus tablegen can't infer that CLZr64 has no
side-effects.

Reviewers: jholewinski

Subscribers: jholewinski, tra, llvm-commits

Differential Revision: http://reviews.llvm.org/D17472

Modified:
    llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
    llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=265089&r1=265088&r2=265089&view=diff
==============================================================================

--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Thu Mar 31 20:09:05 2016
@@ -14,7 +14,9 @@
 include "NVPTXInstrFormats.td"
 
 // A NOP instruction
-def NOP : NVPTXInst<(outs), (ins), "", []>;
+let hasSideEffects = 0 in {
+  def NOP : NVPTXInst<(outs), (ins), "", []>;
+}
 
 // List of vector specific properties
 def isVecLD      : VecInstTypeEnum<1>;
@@ -1227,10 +1229,12 @@ multiclass BFE<string TyStr, RegisterCla
                 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
 }
 
-defm BFE_S32 : BFE<"s32", Int32Regs>;
-defm BFE_U32 : BFE<"u32", Int32Regs>;
-defm BFE_S64 : BFE<"s64", Int64Regs>;
-defm BFE_U64 : BFE<"u64", Int64Regs>;
+let hasSideEffects = 0 in {
+  defm BFE_S32 : BFE<"s32", Int32Regs>;
+  defm BFE_U32 : BFE<"u32", Int32Regs>;
+  defm BFE_S64 : BFE<"s64", Int64Regs>;
+  defm BFE_U64 : BFE<"u64", Int64Regs>;
+}
 
 //-----------------------------------
 // Comparison instructions (setp, set)
@@ -1239,19 +1243,21 @@ defm BFE_U64 : BFE<"u64", Int64Regs>;
 // FIXME: This doesn't cover versions of set and setp that combine with a
 // boolean predicate, e.g. setp.eq.and.b16.
 
-multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
-  def rr :
-    NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
-              !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
-                         "\t$dst, $a, $b;"), []>;
-  def ri :
-    NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
-              !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
-                         "\t$dst, $a, $b;"), []>;
-  def ir :
-    NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
-              !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
-                         "\t$dst, $a, $b;"), []>;
+let hasSideEffects = 0 in {
+  multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
+    def rr :
+      NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
+                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+                           "\t$dst, $a, $b;"), []>;
+    def ri :
+      NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
+                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+                           "\t$dst, $a, $b;"), []>;
+    def ir :
+      NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
+                !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
+                           "\t$dst, $a, $b;"), []>;
+  }
 }
 
 defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
@@ -1270,16 +1276,18 @@ defm SETP_f64 : SETP<"f64", Float64Regs,
 // "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
 // reg, either u32, s32, or f32.  Anyway these aren't used at the moment.
 
-multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
-  def rr : NVPTXInst<(outs Int32Regs:$dst),
-                     (ins RC:$a, RC:$b, CmpMode:$cmp),
-                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
-  def ri : NVPTXInst<(outs Int32Regs:$dst),
-                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
-                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
-  def ir : NVPTXInst<(outs Int32Regs:$dst),
-                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
-                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+let hasSideEffects = 0 in {
+  multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
+    def rr : NVPTXInst<(outs Int32Regs:$dst),
+                       (ins RC:$a, RC:$b, CmpMode:$cmp),
+                       !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+    def ri : NVPTXInst<(outs Int32Regs:$dst),
+                       (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
+                       !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+    def ir : NVPTXInst<(outs Int32Regs:$dst),
+                       (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
+                       !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+  }
 }
 
 defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
@@ -1302,43 +1310,45 @@ defm SET_f64 : SET<"f64", Float64Regs, f
 
 // selp instructions that don't have any pattern matches; we explicitly use
 // them within this file.
-multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
-  def rr : NVPTXInst<(outs RC:$dst),
-                     (ins RC:$a, RC:$b, Int1Regs:$p),
-                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
-  def ri : NVPTXInst<(outs RC:$dst),
-                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
-                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
-  def ir : NVPTXInst<(outs RC:$dst),
-                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
-                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
-  def ii : NVPTXInst<(outs RC:$dst),
-                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
-                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
-}
-
-multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
-                        SDNode ImmNode> {
-  def rr :
-    NVPTXInst<(outs RC:$dst),
-              (ins RC:$a, RC:$b, Int1Regs:$p),
-              !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
-              [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
-  def ri :
-    NVPTXInst<(outs RC:$dst),
-              (ins RC:$a, ImmCls:$b, Int1Regs:$p),
-              !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
-              [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
-  def ir :
-    NVPTXInst<(outs RC:$dst),
-              (ins ImmCls:$a, RC:$b, Int1Regs:$p),
-              !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
-              [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
-  def ii :
-    NVPTXInst<(outs RC:$dst),
-              (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
-              !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
-              [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
+let hasSideEffects = 0 in {
+  multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
+    def rr : NVPTXInst<(outs RC:$dst),
+                       (ins RC:$a, RC:$b, Int1Regs:$p),
+                       !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+    def ri : NVPTXInst<(outs RC:$dst),
+                       (ins RC:$a, ImmCls:$b, Int1Regs:$p),
+                       !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+    def ir : NVPTXInst<(outs RC:$dst),
+                       (ins ImmCls:$a, RC:$b, Int1Regs:$p),
+                       !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+    def ii : NVPTXInst<(outs RC:$dst),
+                       (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
+                       !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+  }
+
+  multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
+                          SDNode ImmNode> {
+    def rr :
+      NVPTXInst<(outs RC:$dst),
+                (ins RC:$a, RC:$b, Int1Regs:$p),
+                !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+                [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
+    def ri :
+      NVPTXInst<(outs RC:$dst),
+                (ins RC:$a, ImmCls:$b, Int1Regs:$p),
+                !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+                [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
+    def ir :
+      NVPTXInst<(outs RC:$dst),
+                (ins ImmCls:$a, RC:$b, Int1Regs:$p),
+                !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+                [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
+    def ii :
+      NVPTXInst<(outs RC:$dst),
+                (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
+                !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+                [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
+  }
 }
 
 // Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as
@@ -1397,14 +1407,16 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Re
                            [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
 
 // Get pointer to local stack.
-def MOV_DEPOT_ADDR :    NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
-                                   "mov.u32 \t$d, __local_depot$num;", []>;
-def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
-                                  "mov.u64 \t$d, __local_depot$num;", []>;
+let hasSideEffects = 0 in {
+  def MOV_DEPOT_ADDR :    NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
+                                     "mov.u32 \t$d, __local_depot$num;", []>;
+  def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
+                                    "mov.u64 \t$d, __local_depot$num;", []>;
+}
 
 
 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let IsSimpleMove=1 in {
+let IsSimpleMove=1, hasSideEffects=0 in {
   def IMOV1rr :  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
                            "mov.pred \t$dst, $sss;", []>;
   def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -2512,41 +2524,45 @@ def : Pat<(select Int32Regs:$pred, Float
           (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
 
 
-// pack a set of smaller int registers to a larger int register
-def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
-                           (ins Int16Regs:$s1, Int16Regs:$s2,
-                                Int16Regs:$s3, Int16Regs:$s4),
-                           "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
-def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
-                           (ins Int16Regs:$s1, Int16Regs:$s2),
-                           "mov.b32\t$d, {{$s1, $s2}};", []>;
-def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
-                           (ins Int32Regs:$s1, Int32Regs:$s2),
-                           "mov.b64\t$d, {{$s1, $s2}};", []>;
-def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
-                           (ins Float32Regs:$s1, Float32Regs:$s2),
-                           "mov.b64\t$d, {{$s1, $s2}};", []>;
-
-// unpack a larger int register to a set of smaller int registers
-def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
-                                 Int16Regs:$d3, Int16Regs:$d4),
-                           (ins Int64Regs:$s),
-                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
-def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
-                           (ins Int32Regs:$s),
-                           "mov.b32\t{{$d1, $d2}}, $s;", []>;
-def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
-                           (ins Int64Regs:$s),
-                           "mov.b64\t{{$d1, $d2}}, $s;", []>;
-def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
-                           (ins Float64Regs:$s),
-                           "mov.b64\t{{$d1, $d2}}, $s;", []>;
+let hasSideEffects = 0 in {
+  // pack a set of smaller int registers to a larger int register
+  def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
+                             (ins Int16Regs:$s1, Int16Regs:$s2,
+                                  Int16Regs:$s3, Int16Regs:$s4),
+                             "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
+  def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
+                             (ins Int16Regs:$s1, Int16Regs:$s2),
+                             "mov.b32\t$d, {{$s1, $s2}};", []>;
+  def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
+                             (ins Int32Regs:$s1, Int32Regs:$s2),
+                             "mov.b64\t$d, {{$s1, $s2}};", []>;
+  def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
+                             (ins Float32Regs:$s1, Float32Regs:$s2),
+                             "mov.b64\t$d, {{$s1, $s2}};", []>;
+
+  // unpack a larger int register to a set of smaller int registers
+  def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
+                                   Int16Regs:$d3, Int16Regs:$d4),
+                             (ins Int64Regs:$s),
+                             "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
+  def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
+                             (ins Int32Regs:$s),
+                             "mov.b32\t{{$d1, $d2}}, $s;", []>;
+  def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
+                             (ins Int64Regs:$s),
+                             "mov.b64\t{{$d1, $d2}}, $s;", []>;
+  def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
+                             (ins Float64Regs:$s),
+                             "mov.b64\t{{$d1, $d2}}, $s;", []>;
+}
 
 // Count leading zeros
-def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
-                       "clz.b32\t$d, $a;", []>;
-def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
-                       "clz.b64\t$d, $a;", []>;
+let hasSideEffects = 0 in {
+  def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+                         "clz.b32\t$d, $a;", []>;
+  def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+                         "clz.b64\t$d, $a;", []>;
+}
 
 // 32-bit has a direct PTX instruction
 def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
@@ -2572,10 +2588,12 @@ def : Pat<(ctlz_zero_undef Int16Regs:$a)
            CvtNONE), 16)>;
 
 // Population count
-def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
-                        "popc.b32\t$d, $a;", []>;
-def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
-                        "popc.b64\t$d, $a;", []>;
+let hasSideEffects = 0 in {
+  def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+                          "popc.b32\t$d, $a;", []>;
+  def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+                          "popc.b64\t$d, $a;", []>;
+}
 
 // 32-bit has a direct PTX instruction
 def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;

Modified: llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td?rev=265089&r1=265088&r2=265089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXIntrinsics.td Thu Mar 31 20:09:05 2016
@@ -1846,54 +1846,61 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs
           (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
       Requires<[noHWROT32]> ;
 
-def GET_LO_INT64
-  : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
-              !strconcat("{{\n\t",
-              !strconcat(".reg .b32 %dummy;\n\t",
-              !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
-        !strconcat("}}", "")))),
-        []> ;
-
-def GET_HI_INT64
-  : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
-              !strconcat("{{\n\t",
-              !strconcat(".reg .b32 %dummy;\n\t",
-              !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
-        !strconcat("}}", "")))),
-        []> ;
-
-def PACK_TWO_INT32
-  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
-              "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
+let hasSideEffects = 0 in {
+  def GET_LO_INT64
+    : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+                !strconcat("{{\n\t",
+                !strconcat(".reg .b32 %dummy;\n\t",
+                !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
+          !strconcat("}}", "")))),
+          []> ;
+
+  def GET_HI_INT64
+    : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+                !strconcat("{{\n\t",
+                !strconcat(".reg .b32 %dummy;\n\t",
+                !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
+          !strconcat("}}", "")))),
+          []> ;
+}
+
+let hasSideEffects = 0 in {
+  def PACK_TWO_INT32
+    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
+                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
+}
 
 def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
           (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
                           (GET_LO_INT64 Int64Regs:$src))> ;
 
-// funnel shift, requires >= sm_32
-def SHF_L_WRAP_B32_IMM
-  : NVPTXInst<(outs Int32Regs:$dst),
-              (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
-              "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
-    Requires<[hasHWROT32]>;
-
-def SHF_L_WRAP_B32_REG
-  : NVPTXInst<(outs Int32Regs:$dst),
-              (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
-              "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
-    Requires<[hasHWROT32]>;
-
-def SHF_R_WRAP_B32_IMM
-  : NVPTXInst<(outs Int32Regs:$dst),
-              (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
-              "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
-    Requires<[hasHWROT32]>;
-
-def SHF_R_WRAP_B32_REG
-  : NVPTXInst<(outs Int32Regs:$dst),
-              (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
-              "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
-    Requires<[hasHWROT32]>;
+// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
+// no side effects.
+let hasSideEffects = 0 in {
+  def SHF_L_WRAP_B32_IMM
+    : NVPTXInst<(outs Int32Regs:$dst),
+                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+      Requires<[hasHWROT32]>;
+
+  def SHF_L_WRAP_B32_REG
+    : NVPTXInst<(outs Int32Regs:$dst),
+                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+      Requires<[hasHWROT32]>;
+
+  def SHF_R_WRAP_B32_IMM
+    : NVPTXInst<(outs Int32Regs:$dst),
+                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+      Requires<[hasHWROT32]>;
+
+  def SHF_R_WRAP_B32_REG
+    : NVPTXInst<(outs Int32Regs:$dst),
+                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+      Requires<[hasHWROT32]>;
+}
 
 // HW version of rotate 64
 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),