[llvm] b083340 - [llvm][NVPTX] Don't reorder MIs that construct a PTX function call (#116522)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 14:12:23 PST 2024
Author: Youngsuk Kim
Date: 2024-11-18T17:12:19-05:00
New Revision: b083340cb663b6bd785dbd5864e5afd950745e35
URL: https://github.com/llvm/llvm-project/commit/b083340cb663b6bd785dbd5864e5afd950745e35
DIFF: https://github.com/llvm/llvm-project/commit/b083340cb663b6bd785dbd5864e5afd950745e35.diff
LOG: [llvm][NVPTX] Don't reorder MIs that construct a PTX function call (#116522)
With "-enable-misched", MachineScheduler can reorder MIs that must stick
together (in initially set order) to generate legal PTX code for a
function call.
When generating PTX code for the attached test (using LLVM before this
revision), the following invalid PTX code is generated:
```
{ // callseq 0, 0
.param .b64 param0;
st.param.f64 [param0], 0d0000000000000000;
.param .b64 retval0;
call.uni (retval0),
mul.lo.s32 %r7, %r10, %r3;
or.b32 %r8, %r4, %r7;
mul.lo.s32 %r9, %r2, %r8;
cvt.rn.f64.s32 %fd3, %r9;
quux,
(
param0
);
ld.param.f64 %fd1, [retval0];
} // callseq 0
```
Added:
llvm/test/CodeGen/NVPTX/misched_func_call.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 4661c059d5f783..b4dbe6a0930caa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -199,3 +199,23 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
+
+bool NVPTXInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Prevent the scheduler from reordering & splitting up MachineInstrs
+ // which must stick together (in initially set order) to
+ // comprise a valid PTX function call sequence.
+ switch (MI.getOpcode()) {
+ case NVPTX::CallUniPrintCallRetInst1:
+ case NVPTX::CallArgBeginInst:
+ case NVPTX::CallArgI32imm:
+ case NVPTX::CallArgParam:
+ case NVPTX::LastCallArgI32imm:
+ case NVPTX::LastCallArgParam:
+ case NVPTX::CallArgEndInst1:
+ return true;
+ }
+
+ return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index f674a00bc351bf..a1d9f017120188 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -67,6 +67,9 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
+ bool isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
};
} // namespace llvm
diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
new file mode 100644
index 00000000000000..e036753ce90306
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O3 -march=nvptx64 -enable-misched %s -o - | FileCheck %s
+
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
+; CHECK-LABEL: wombat(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<11>;
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-NEXT: .reg .f64 %fd<6>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0: // %bb
+; CHECK-NEXT: ld.param.u32 %r4, [wombat_param_2];
+; CHECK-NEXT: ld.param.u32 %r3, [wombat_param_1];
+; CHECK-NEXT: ld.param.u32 %r2, [wombat_param_0];
+; CHECK-NEXT: mov.b32 %r10, 0;
+; CHECK-NEXT: mov.u64 %rd1, 0;
+; CHECK-NEXT: mov.b32 %r6, 1;
+; CHECK-NEXT: $L__BB0_1: // %bb3
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: st.param.f64 [param0], 0d0000000000000000;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: call.uni (retval0),
+; CHECK-NEXT: quux,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: );
+; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
+; CHECK-NEXT: or.b32 %r8, %r4, %r7;
+; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8;
+; CHECK-NEXT: cvt.rn.f64.s32 %fd3, %r9;
+; CHECK-NEXT: ld.param.f64 %fd1, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: cvt.rn.f64.u32 %fd4, %r10;
+; CHECK-NEXT: add.rn.f64 %fd5, %fd4, %fd3;
+; CHECK-NEXT: st.global.f64 [%rd1], %fd5;
+; CHECK-NEXT: mov.u32 %r10, %r6;
+; CHECK-NEXT: bra.uni $L__BB0_1;
+bb:
+ br label %bb3
+
+bb3: ; preds = %bb3, %bb
+ %phi = phi i32 [ 0, %bb ], [ 1, %bb3 ]
+ %call = tail call double @quux(double 0.000000e+00)
+ %mul = mul i32 %phi, %arg1
+ %or = or i32 %arg2, %mul
+ %mul4 = mul i32 %arg, %or
+ %sitofp = sitofp i32 %mul4 to double
+ %uitofp = uitofp i32 %phi to double
+ %fadd = fadd double %uitofp, %sitofp
+ store double %fadd, ptr addrspace(1) null, align 8
+ br label %bb3
+}
+
+declare double @quux(double)
More information about the llvm-commits
mailing list