[llvm] b2dcf62 - [NVPTX] fix emission for i1 load and extload (#99392)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 10:29:29 PDT 2024
Author: Meredith Julian
Date: 2024-07-18T10:29:24-07:00
New Revision: b2dcf62c514d3c9c143c85bd029d22098b92c38d
URL: https://github.com/llvm/llvm-project/commit/b2dcf62c514d3c9c143c85bd029d22098b92c38d
DIFF: https://github.com/llvm/llvm-project/commit/b2dcf62c514d3c9c143c85bd029d22098b92c38d.diff
LOG: [NVPTX] fix emission for i1 load and extload (#99392)
Currently, an illegal 2-byte load from a 1-byte global variable is being
generated. This change instead generates a 1-byte load and zero-extends
it to i16 register. This was always the intended behavior of the
function.
In addition, an i1 ext load of any kind needs to be promoted. A missing
setLoadExtAction for ISD::EXTLOAD was causing an "Unhandled source type"
unreachable due to an illegal i1 ext load during ISelDAGtoDAG (see below
bug).
Bug https://github.com/llvm/llvm-project/issues/98033.
Added:
llvm/test/CodeGen/NVPTX/i1-ext-load.ll
llvm/test/CodeGen/NVPTX/i1-load-lower.ll
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a2181b478c269..bc23998455a68 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -629,6 +629,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(VT, MVT::i1, Expand);
}
@@ -2920,9 +2921,10 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
assert(Node->getValueType(0) == MVT::i1 &&
"Custom lowering for i1 load only");
- SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LD->getAlign(),
- LD->getMemOperand()->getFlags());
+ SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(),
+ MVT::i8, LD->getAlign(),
+ LD->getMemOperand()->getFlags());
SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
// The legalizer (the caller) is expecting two values from the legalized
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
new file mode 100644
index 0000000000000..b775e40470047
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\{\}\(\)]" --version 5
+
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_50 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
+; CHECK-LABEL: foo(
+; CHECK: .reg .b16 %rs<2>;
+; CHECK: .reg .b32 %r<4>;
+; CHECK: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK: ld.param.u64 %rd1, [foo_param_0];
+; CHECK: ld.param.u64 %rd2, [foo_param_1];
+; CHECK: cvta.to.global.u64 %rd3, %rd2;
+; CHECK: cvta.to.global.u64 %rd4, %rd1;
+; CHECK: ld.global.nc.u8 %rs1, [%rd4];
+; CHECK: cvt.u32.u8 %r1, %rs1;
+; CHECK: add.s32 %r2, %r1, 1;
+; CHECK: and.b32 %r3, %r2, 1;
+; CHECK: st.global.u32 [%rd3], %r3;
+; CHECK: ret;
+ %ld = load i1, ptr %ptr, align 1
+ %zext = zext i1 %ld to i32
+ %add = add i32 %zext, 1
+ %and = and i32 %add, 1
+ store i32 %and, ptr %retval
+ ret void
+}
+
+!nvvm.annotations = !{!0}
+
+!0 = !{ptr @foo, !"kernel", i32 1}
diff --git a/llvm/test/CodeGen/NVPTX/i1-load-lower.ll b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
new file mode 100644
index 0000000000000..d1f99b5724de8
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\(\)\{\}]" --version 5
+
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+
+target triple = "nvptx-nvidia-cuda"
+
+ at i1g = addrspace(1) global i1 false, align 2
+
+define void @foo() {
+; CHECK-LABEL: foo(
+; CHECK: .reg .pred %p<2>;
+; CHECK: .reg .b16 %rs<4>;
+; CHECK-EMPTY:
+; CHECK: ld.global.u8 %rs1, [i1g];
+; CHECK: and.b16 %rs2, %rs1, 1;
+; CHECK: setp.eq.b16 %p1, %rs2, 1;
+; CHECK: @%p1 bra $L__BB0_2;
+; CHECK: mov.u16 %rs3, 1;
+; CHECK: st.global.u8 [i1g], %rs3;
+; CHECK: ret;
+ %tmp = load i1, ptr addrspace(1) @i1g, align 2
+ br i1 %tmp, label %if.end, label %if.then
+
+if.then:
+ store i1 true, ptr addrspace(1) @i1g, align 2
+ br label %if.end
+
+if.end:
+ ret void
+}
More information about the llvm-commits
mailing list