[llvm] [NVPTX] Auto-Upgrade llvm.nvvm.swap.lo.hi.b64 to llvm.fshl (PR #132098)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 13:54:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
After 3c8c2914e067e132af951f70d2b3577fe049e19a the lowering of 64-bit funnel shifts has been improved to the point where this intrinsic is no longer needed.
---
Full diff: https://github.com/llvm/llvm-project/pull/132098.diff
4 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+1-6)
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+7-1)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (-5)
- (modified) llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll (+3)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 665db3025903e..80e10f33b770d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -45,6 +45,7 @@
// * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant
// * llvm.nvvm.ldg.global.f --> ibid.
// * llvm.nvvm.ldg.global.p --> ibid.
+// * llvm.nvvm.swap.lo.hi.b64 --> llvm.fshl(x, x, 32)
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
@@ -4635,12 +4636,6 @@ def int_nvvm_sust_p_3d_v4i32_trap
"llvm.nvvm.sust.p.3d.v4i32.trap">,
ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
-def int_nvvm_swap_lo_hi_b64
- : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
- ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
-
-
// Accessing special registers.
class PTXReadSRegIntrinsicNB_r32
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7a194219c5cd4..9be307bb071ed 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1292,7 +1292,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
if (Name.consume_front("abs."))
// nvvm.abs.{i,ii}
Expand = Name == "i" || Name == "ll";
- else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
+ else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f" ||
+ Name == "swap.lo.hi.b64")
Expand = true;
else if (Name.consume_front("max.") || Name.consume_front("min."))
// nvvm.{min,max}.{i,ii,ui,ull}
@@ -2370,6 +2371,11 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
{Arg, Arg, ZExtShiftAmt});
+ } else if (Name == "swap.lo.hi.b64") {
+ Type *Int64Ty = Builder.getInt64Ty();
+ Value *Arg = CI->getOperand(0);
+ Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
+ {Arg, Arg, Builder.getInt64(32)});
} else if ((Name.consume_front("ptr.gen.to.") &&
(Name.starts_with("local") || Name.starts_with("shared") ||
Name.starts_with("global") || Name.starts_with("constant"))) ||
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 90f56a421b19b..b2e05a567b4fe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2947,11 +2947,6 @@ def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
-
-def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src),
- (V2I32toI64 (I64toI32H $src),
- (I64toI32L $src))> ;
-
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 5cc3a30277459..588e79a7428a4 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -34,6 +34,7 @@ declare double @llvm.nvvm.bitcast.ll2d(i64)
declare i32 @llvm.nvvm.rotate.b32(i32, i32)
declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
declare i64 @llvm.nvvm.rotate.b64(i64, i32)
+declare i64 @llvm.nvvm.swap.lo.hi.b64(i64)
declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
@@ -166,10 +167,12 @@ define void @rotate(i32 %a, i64 %b) {
; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
+; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 32)
;
%r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
%r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
%r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
+ %r4 = call i64 @llvm.nvvm.swap.lo.hi.b64(i64 %b)
ret void
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/132098
More information about the llvm-commits
mailing list