[llvm] 4e80a03 - [NVPTX] Use prmt.f4e to lower pointer alignment fshr idiom (#143407)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 23:09:58 PDT 2025
Author: Alex MacLean
Date: 2025-06-12T23:09:55-07:00
New Revision: 4e80a033a1bade55bca8a32e267cf1b06d05b1ed
URL: https://github.com/llvm/llvm-project/commit/4e80a033a1bade55bca8a32e267cf1b06d05b1ed
DIFF: https://github.com/llvm/llvm-project/commit/4e80a033a1bade55bca8a32e267cf1b06d05b1ed.diff
LOG: [NVPTX] Use prmt.f4e to lower pointer alignment fshr idiom (#143407)
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/prmt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index fa521c040e8e5..4c3501df57f84 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1621,6 +1621,10 @@ let hasSideEffects = false in {
}
+// PRMT folding patterns
+def : Pat<(fshr i32:$hi, i32:$lo, (shl i32:$amt, (i32 3))),
+ (PRMT_B32rrr $lo, $hi, $amt, PrmtF4E)>;
+
// byte extraction + signed/unsigned extension to i32.
def : Pat<(i32 (sext_inreg (bfe i32:$s, i32:$o, 8), i8)),
diff --git a/llvm/test/CodeGen/NVPTX/prmt.ll b/llvm/test/CodeGen/NVPTX/prmt.ll
index 271e4c86cd23e..48b9eefb9fb30 100644
--- a/llvm/test/CodeGen/NVPTX/prmt.ll
+++ b/llvm/test/CodeGen/NVPTX/prmt.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -verify-machineinstrs | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -verify-machineinstrs -mcpu=sm_50 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -verify-machineinstrs -mcpu=sm_50 | %ptxas-verify %}
target triple = "nvptx64-nvidia-cuda"
@@ -111,3 +111,20 @@ define i32 @test_prmt_rc16(i32 %lo, i32 %selector) {
%val = call i32 @llvm.nvvm.prmt.rc16(i32 %lo, i32 %selector)
ret i32 %val
}
+
+define i32 @test_prmt_f4e_folding(i32 %lo, i32 %hi, i32 %ptr) {
+; CHECK-LABEL: test_prmt_f4e_folding(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_prmt_f4e_folding_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_prmt_f4e_folding_param_1];
+; CHECK-NEXT: ld.param.b32 %r3, [test_prmt_f4e_folding_param_2];
+; CHECK-NEXT: prmt.b32.f4e %r4, %r1, %r2, %r3;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
+; CHECK-NEXT: ret;
+ %sh_amt = shl i32 %ptr, 3
+ %val = call i32 @llvm.fshr.i32(i32 %hi, i32 %lo, i32 %sh_amt)
+ ret i32 %val
+}
More information about the llvm-commits
mailing list