[llvm] [NVPTX] Auto-Upgrade llvm.nvvm.swap.lo.hi.b64 to llvm.fshl (PR #132098)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 13:54:36 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

<details>
<summary>Changes</summary>

After 3c8c2914e067e132af951f70d2b3577fe049e19a the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed.

---
Full diff: https://github.com/llvm/llvm-project/pull/132098.diff


4 Files Affected:

- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+1-6) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+7-1) 
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (-5) 
- (modified) llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll (+3) 


``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 665db3025903e..80e10f33b770d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -45,6 +45,7 @@
 //   * llvm.nvvm.ldg.global.i        --> load addrspace(1) !load.invariant
 //   * llvm.nvvm.ldg.global.f        --> ibid.
 //   * llvm.nvvm.ldg.global.p        --> ibid.
+//   * llvm.nvvm.swap.lo.hi.b64      --> llvm.fshl(x, x, 32)
 
 def llvm_global_ptr_ty  : LLVMQualPointerType<1>;  // (global)ptr
 def llvm_shared_ptr_ty  : LLVMQualPointerType<3>;  // (shared)ptr
@@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
               "llvm.nvvm.sust.p.3d.v4i32.trap">,
     ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
 
-def int_nvvm_swap_lo_hi_b64
-  : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
-              [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
-              ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
-
-
 // Accessing special registers.
 
 class PTXReadSRegIntrinsicNB_r32
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7a194219c5cd4..9be307bb071ed 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
       if (Name.consume_front("abs."))
         // nvvm.abs.{i,ii}
         Expand = Name == "i" || Name == "ll";
-      else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
+      else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
+               Name == "swap.lo.hi.b64")
         Expand = true;
       else if (Name.consume_front("max.") || Name.consume_front("min."))
         // nvvm.{min,max}.{i,ii,ui,ull}
@@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
     Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
                                   {Arg, Arg, ZExtShiftAmt});
+  } else if (Name == "swap.lo.hi.b64") {
+    Type *Int64Ty = Builder.getInt64Ty();
+    Value *Arg = CI->getOperand(0);
+    Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
+                                  {Arg, Arg, Builder.getInt64(32)});
   } else if ((Name.consume_front("ptr.gen.to.") &&
               (Name.starts_with("local") || Name.starts_with("shared") ||
                Name.starts_with("global") || Name.starts_with("constant"))) ||
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 90f56a421b19b..b2e05a567b4fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
 def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
 def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
 
-
-def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
-          (V2I32toI64 (I64toI32H $src),
-                      (I64toI32L $src))> ;
-
 //-----------------------------------
 // Texture Intrinsics
 //-----------------------------------
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 5cc3a30277459..588e79a7428a4 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64)
 declare i32 @llvm.nvvm.rotate.b32(i32, i32)
 declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
 declare i64 @llvm.nvvm.rotate.b64(i64, i32)
+declare i64 @llvm.nvvm.swap.lo.hi.b64(i64)
 
 declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
 declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
@@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) {
 ; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
 ; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
 ; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
+; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32)
 ;
   %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
   %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
   %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
+  %r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b)
   ret void
 }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/132098


More information about the llvm-commits mailing list