[PATCH] D22428: [NVPTX] Force minimum alignment of 4 for byval arguments of device-side functions.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 18 10:23:17 PDT 2016
tra updated this revision to Diff 64342.
tra added a comment.
updated comments.
https://reviews.llvm.org/D22428
Files:
lib/Target/NVPTX/NVPTXAsmPrinter.cpp
lib/Target/NVPTX/NVPTXISelLowering.cpp
test/CodeGen/NVPTX/param-align.ll
Index: test/CodeGen/NVPTX/param-align.ll
===================================================================
--- test/CodeGen/NVPTX/param-align.ll
+++ test/CodeGen/NVPTX/param-align.ll
@@ -23,3 +23,11 @@
; CHECK: .param .align 4 .b8 t3_param_0[8]
ret void
}
+
+;;; Need at least 4-byte alignment in order to avoid miscompilation by
+;;; ptxas for sm_50+
+define ptx_device void @t4(i8* byval %x) {
+; CHECK: .func t4
+; CHECK: .param .align 4 .b8 t4_param_0[1]
+ ret void
+}
Index: lib/Target/NVPTX/NVPTXISelLowering.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1072,6 +1072,7 @@
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
auto &DL = MF.getDataLayout();
+ bool isKernel = llvm::isKernelFunction(*F);
SDValue tempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain,
@@ -1337,11 +1338,15 @@
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
- SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32), InFlag
- };
+
+ // Enforce minumum alignment of 4 to work around ptxas miscompile
+ // for sm_50+. See corresponding alignment adjustment in
+ // emitFunctionParamList() for details.
+ if (!isKernel && ArgAlign < 4)
+ ArgAlign = 4;
+ SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
Index: lib/Target/NVPTX/NVPTXAsmPrinter.cpp
===================================================================
--- lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1589,7 +1589,19 @@
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
align = DL.getABITypeAlignment(ETy);
-
+ // Work around a bug in ptxas. When PTX code takes address of
+ // byval parameter with alignment < 4, ptxas generates code to
+ // spill argument into memory. Alas on sm_50+ ptxas generates
+ // SASS code that fails with misaligned access. To work around
+ // the problem, make sure that we align byval parameters by at
+ // least 4. Matching change must be made in LowerCall() where we
+ // prepare parameters for the call.
+ //
+ // TODO: this will need to be undone when we get to support multi-TU
+ // device-side compilation as it breaks ABI compatibility with nvcc.
+ // Hopefully ptxas bug is fixed by then.
+ if (!isKernelFunc && align < 4)
+ align = 4;
unsigned sz = DL.getTypeAllocSize(ETy);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D22428.64342.patch
Type: text/x-patch
Size: 3219 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160718/f1060cdc/attachment.bin>
More information about the llvm-commits
mailing list