[llvm] [NVPTX] fix emission for i1 load and extload (PR #99392)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 17 14:33:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Meredith Julian (mjulian31)
<details>
<summary>Changes</summary>
Currently, an illegal 2-byte load from a 1-byte global variable is being generated. This change instead generates a 1-byte load and zero-extends it to i16 register. This was always the intended behavior of the function.
In addition, an i1 ext load of any kind needs to be promoted. A missing setLoadExtAction for ISD::EXTLOAD was causing an "Unhandled source type" unreachable due to an illegal i1 ext load during ISelDAGtoDAG (see below bug).
Bug https://github.com/llvm/llvm-project/issues/98033.
LIT tests run locally:
| LIT Tests | |
| ------------- | ------ |
| Testing Time | 94.93s |
| Total Discovered Tests | 60487 |
| Skipped | 478 (0.79%) |
| Unsupported | 28409 (46.97%) |
| Passed | 31528 (52.12%) |
| Expectedly Failed | 72 (0.12%) |
---
Full diff: https://github.com/llvm/llvm-project/pull/99392.diff
3 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+5-3)
- (added) llvm/test/CodeGen/NVPTX/i1-ext-load.ll (+34)
- (added) llvm/test/CodeGen/NVPTX/i1-load-lower.ll (+31)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a2181b478c269..bc23998455a68 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -629,6 +629,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
@@ -2920,9 +2921,10 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
- SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LD->getAlign(),
- LD->getMemOperand()->getFlags());
+ SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(),
+ MVT::i8, LD->getAlign(),
+ LD->getMemOperand()->getFlags());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
new file mode 100644
index 0000000000000..b775e40470047
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\{\}\(\)]" --version 5
+
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_50 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
+; CHECK-LABEL: foo(
+; CHECK: .reg .b16 %rs<2>;
+; CHECK: .reg .b32 %r<4>;
+; CHECK: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK: ld.param.u64 %rd1, [foo_param_0];
+; CHECK: ld.param.u64 %rd2, [foo_param_1];
+; CHECK: cvta.to.global.u64 %rd3, %rd2;
+; CHECK: cvta.to.global.u64 %rd4, %rd1;
+; CHECK: ld.global.nc.u8 %rs1, [%rd4];
+; CHECK: cvt.u32.u8 %r1, %rs1;
+; CHECK: add.s32 %r2, %r1, 1;
+; CHECK: and.b32 %r3, %r2, 1;
+; CHECK: st.global.u32 [%rd3], %r3;
+; CHECK: ret;
+ %ld = load i1, ptr %ptr, align 1
+ %zext = zext i1 %ld to i32
+ %add = add i32 %zext, 1
+ %and = and i32 %add, 1
+ store i32 %and, ptr %retval
+ ret void
+}
+
+!nvvm.annotations = !{!0}
+
+!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/i1-load-lower.ll b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
new file mode 100644
index 0000000000000..d1f99b5724de8
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\(\)\{\}]" --version 5
+
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+ at i1g = addrspace(1) global i1 false, align 2
+
+define void @foo() {
+; CHECK-LABEL: foo(
+; CHECK: .reg .pred %p<2>;
+; CHECK: .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK: ld.global.u8 %rs1, [i1g];
+; CHECK: and.b16 %rs2, %rs1, 1;
+; CHECK: setp.eq.b16 %p1, %rs2, 1;
+; CHECK: @%p1 bra $L__BB0_2;
+; CHECK: mov.u16 %rs3, 1;
+; CHECK: st.global.u8 [i1g], %rs3;
+; CHECK: ret;
+ %tmp = load i1, ptr addrspace(1) @i1g, align 2
+ br i1 %tmp, label %if.end, label %if.then
+
+if.then:
+ store i1 true, ptr addrspace(1) @i1g, align 2
+ br label %if.end
+
+if.end:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/99392
More information about the llvm-commits
mailing list