[llvm] [WebAssembly] Fix lowering of (extending) loads from addrspace(1) globals (PR #155937)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 12:10:08 PDT 2025
https://github.com/QuantumSegfault updated https://github.com/llvm/llvm-project/pull/155937
>From d5aaf8398a652fcb36b15b51183b28f97a9a6842 Mon Sep 17 00:00:00 2001
From: QuantumSegfault <fungi-turbos-7l at icloud.com>
Date: Tue, 2 Sep 2025 20:55:35 -0700
Subject: [PATCH 1/2] Pre-commit tests for fixing (ext)loads from WASM globals
---
.../WebAssembly/lower-load-wasm-global.ll | 177 ++++++++++++++++++
1 file changed, 177 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
diff --git a/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
new file mode 100644
index 0000000000000..3da1ad9e36831
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+; Test various loads from WASM (address space 1) globals lower as intended
+
+target triple = "wasm32-unknown-unknown"
+
+
+ at globalI8 = local_unnamed_addr addrspace(1) global i8 undef
+ at globalI32 = local_unnamed_addr addrspace(1) global i32 undef
+ at globalI64 = local_unnamed_addr addrspace(1) global i64 undef
+
+
+define i32 @zext_i8_i32() {
+; CHECK-LABEL: zext_i8_i32:
+; CHECK: .functype zext_i8_i32 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.load8_u globalI32
+; CHECK-NEXT: # fallthrough-return
+ %v = load i8, ptr addrspace(1) @globalI32
+ %e = zext i8 %v to i32
+ ret i32 %e
+}
+
+define i32 @sext_i8_i32() {
+; CHECK-LABEL: sext_i8_i32:
+; CHECK: .functype sext_i8_i32 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.load8_s globalI32
+; CHECK-NEXT: # fallthrough-return
+ %v = load i8, ptr addrspace(1) @globalI32
+ %e = sext i8 %v to i32
+ ret i32 %e
+}
+
+define i32 @zext_i16_i32() {
+; CHECK-LABEL: zext_i16_i32:
+; CHECK: .functype zext_i16_i32 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.load16_u globalI32
+; CHECK-NEXT: # fallthrough-return
+ %v = load i16, ptr addrspace(1) @globalI32
+ %e = zext i16 %v to i32
+ ret i32 %e
+}
+
+define i32 @sext_i16_i32() {
+; CHECK-LABEL: sext_i16_i32:
+; CHECK: .functype sext_i16_i32 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.load16_s globalI32
+; CHECK-NEXT: # fallthrough-return
+ %v = load i16, ptr addrspace(1) @globalI32
+ %e = sext i16 %v to i32
+ ret i32 %e
+}
+
+
+define i64 @zext_i8_i64() {
+; CHECK-LABEL: zext_i8_i64:
+; CHECK: .functype zext_i8_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load8_u globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i8, ptr addrspace(1) @globalI64
+ %e = zext i8 %v to i64
+ ret i64 %e
+}
+
+define i64 @sext_i8_i64() {
+; CHECK-LABEL: sext_i8_i64:
+; CHECK: .functype sext_i8_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load8_s globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i8, ptr addrspace(1) @globalI64
+ %e = sext i8 %v to i64
+ ret i64 %e
+}
+
+define i64 @zext_i16_i64() {
+; CHECK-LABEL: zext_i16_i64:
+; CHECK: .functype zext_i16_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load16_u globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i16, ptr addrspace(1) @globalI64
+ %e = zext i16 %v to i64
+ ret i64 %e
+}
+
+define i64 @sext_i16_i64() {
+; CHECK-LABEL: sext_i16_i64:
+; CHECK: .functype sext_i16_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load16_s globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i16, ptr addrspace(1) @globalI64
+ %e = sext i16 %v to i64
+ ret i64 %e
+}
+
+define i64 @zext_i32_i64() {
+; CHECK-LABEL: zext_i32_i64:
+; CHECK: .functype zext_i32_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load32_u globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i32, ptr addrspace(1) @globalI64
+ %e = zext i32 %v to i64
+ ret i64 %e
+}
+
+define i64 @sext_i32_i64() {
+; CHECK-LABEL: sext_i32_i64:
+; CHECK: .functype sext_i32_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load32_s globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i32, ptr addrspace(1) @globalI64
+ %e = sext i32 %v to i64
+ ret i64 %e
+}
+
+
+define i64 @load_i64_from_i32() {
+; CHECK-LABEL: load_i64_from_i32:
+; CHECK: .functype load_i64_from_i32 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: # fallthrough-return
+ %v = load i64, ptr addrspace(1) @globalI32
+ ret i64 %v
+}
+
+define i32 @load_i32_from_i64() {
+; CHECK-LABEL: load_i32_from_i64:
+; CHECK: .functype load_i32_from_i64 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: # fallthrough-return
+ %v = load i32, ptr addrspace(1) @globalI64
+ ret i32 %v
+}
+
+define i8 @load_i8() {
+; CHECK-LABEL: load_i8:
+; CHECK: .functype load_i8 () -> (i32)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.load8_u globalI8
+; CHECK-NEXT: # fallthrough-return
+ %v = load i8, ptr addrspace(1) @globalI8
+ ret i8 %v
+}
+
+define i64 @load_i16_from_i8_zext_to_i64() {
+; CHECK-LABEL: load_i16_from_i8_zext_to_i64:
+; CHECK: .functype load_i16_from_i8_zext_to_i64 () -> (i64)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i64.load16_u globalI8
+; CHECK-NEXT: # fallthrough-return
+ %v = load i16, ptr addrspace(1) @globalI8
+ %e = zext i16 %v to i64
+ ret i64 %e
+}
>From 584c5227aebfa633f6fc0d2e70a1baba93555d2f Mon Sep 17 00:00:00 2001
From: QuantumSegfault <fungi-turbos-7l at icloud.com>
Date: Thu, 28 Aug 2025 10:43:50 -0700
Subject: [PATCH 2/2] Fix lowering of loads (and extending loads) from
addrspace(1) globals
---
.../WebAssembly/WebAssemblyISelLowering.cpp | 308 +++++++++++++++++-
.../WebAssembly/WebAssemblyISelLowering.h | 3 +-
.../WebAssembly/lower-load-wasm-global.ll | 56 ++--
3 files changed, 337 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 5a45134692865..2124251650e70 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -18,6 +18,7 @@
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -91,6 +92,19 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::STORE, T, Custom);
}
+
+ // Likewise, transform zext/sext/anyext extending loads from address space 1
+ // (WASM globals)
+ setLoadExtAction({ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}, MVT::i32,
+ {MVT::i8, MVT::i16}, Custom);
+ setLoadExtAction({ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}, MVT::i64,
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
+
+ // Compensate for the EXTLOADs being custom by reimplementing some combiner
+ // logic
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+
if (Subtarget->hasSIMD128()) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
MVT::v2f64}) {
@@ -1707,6 +1721,11 @@ static bool IsWebAssemblyGlobal(SDValue Op) {
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
+ if (Op->getOpcode() == WebAssemblyISD::Wrapper)
+ if (const GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(Op->getOperand(0)))
+ return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
+
return false;
}
@@ -1764,16 +1783,115 @@ SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
const SDValue &Base = LN->getBasePtr();
const SDValue &Offset = LN->getOffset();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ EVT ResultType = LN->getValueType(0);
if (IsWebAssemblyGlobal(Base)) {
if (!Offset->isUndef())
report_fatal_error(
"unexpected offset when loading from webassembly global", false);
- SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
- SDValue Ops[] = {LN->getChain(), Base};
- return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
- LN->getMemoryVT(), LN->getMemOperand());
+ if (!ResultType.isInteger() && !ResultType.isFloatingPoint()) {
+ SDVTList Tys = DAG.getVTList(ResultType, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), Base};
+ SDValue GlobalGetNode =
+ DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
+ LN->getMemoryVT(), LN->getMemOperand());
+ return GlobalGetNode;
+ }
+
+ EVT GT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(
+ Base->getOpcode() == WebAssemblyISD::Wrapper ? Base->getOperand(0)
+ : Base))
+ GT = EVT::getEVT(GA->getGlobal()->getValueType());
+
+ if (GT != MVT::i8 && GT != MVT::i16 && GT != MVT::i32 && GT != MVT::i64 &&
+ GT != MVT::f32 && GT != MVT::f64)
+ report_fatal_error("encountered unexpected global type for Base when "
+ "loading from webassembly global",
+ false);
+
+ EVT PromotedGT = getTypeToTransformTo(*DAG.getContext(), GT);
+
+ if (ExtType == ISD::NON_EXTLOAD) {
+ // A normal, non-extending load may try to load more or less than the
+ // underlying global, which is invalid. We lower this to a load of the
+ // global (i32 or i64) then truncate or extend as needed
+
+ // Modify the MMO to load the full global
+ MachineMemOperand *OldMMO = LN->getMemOperand();
+ MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
+ OldMMO->getPointerInfo(), OldMMO->getFlags(),
+ LLT(PromotedGT.getSimpleVT()), OldMMO->getBaseAlign(),
+ OldMMO->getAAInfo(), OldMMO->getRanges(), OldMMO->getSyncScopeID(),
+ OldMMO->getSuccessOrdering(), OldMMO->getFailureOrdering());
+
+ SDVTList Tys = DAG.getVTList(PromotedGT, MVT::Other);
+ SDValue Ops[] = {LN->getChain(), Base};
+ SDValue GlobalGetNode = DAG.getMemIntrinsicNode(
+ WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops, PromotedGT, NewMMO);
+
+ if (ResultType.bitsEq(PromotedGT)) {
+ return GlobalGetNode;
+ }
+
+ SDValue ValRes;
+ if (ResultType.isFloatingPoint())
+ ValRes = DAG.getFPExtendOrRound(GlobalGetNode, DL, ResultType);
+ else
+ ValRes = DAG.getAnyExtOrTrunc(GlobalGetNode, DL, ResultType);
+
+ return DAG.getMergeValues({ValRes, GlobalGetNode.getValue(1)}, DL);
+ }
+
+ if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::SEXTLOAD) {
+ // Turn the unsupported load into an EXTLOAD followed by an
+ // explicit zero/sign extend inreg. Same as Expand
+
+ SDValue Result =
+ DAG.getExtLoad(ISD::EXTLOAD, DL, ResultType, LN->getChain(), Base,
+ LN->getMemoryVT(), LN->getMemOperand());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Result.getValueType(),
+ Result, DAG.getValueType(LN->getMemoryVT()));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, DL, LN->getMemoryVT());
+
+ return DAG.getMergeValues({ValRes, Result.getValue(1)}, DL);
+ }
+
+ if (ExtType == ISD::EXTLOAD) {
+ // Expand the EXTLOAD into a regular LOAD of the global, and if
+ // needed, a zero-extension
+
+ EVT OldLoadType = LN->getMemoryVT();
+ EVT NewLoadType = getTypeToTransformTo(*DAG.getContext(), OldLoadType);
+
+ // Modify the MMO to load a whole WASM "register"'s worth
+ MachineMemOperand *OldMMO = LN->getMemOperand();
+ MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
+ OldMMO->getPointerInfo(), OldMMO->getFlags(),
+ LLT(NewLoadType.getSimpleVT()), OldMMO->getBaseAlign(),
+ OldMMO->getAAInfo(), OldMMO->getRanges(), OldMMO->getSyncScopeID(),
+ OldMMO->getSuccessOrdering(), OldMMO->getFailureOrdering());
+
+ SDValue Result =
+ DAG.getLoad(NewLoadType, DL, LN->getChain(), Base, NewMMO);
+
+ if (NewLoadType != ResultType) {
+ SDValue ValRes = DAG.getNode(ISD::ANY_EXTEND, DL, ResultType, Result);
+ return DAG.getMergeValues({ValRes, Result.getValue(1)}, DL);
+ }
+
+ return Result;
+ }
+
+ report_fatal_error(
+ "encountered unexpected ExtType when loading from webassembly global",
+ false);
}
if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
@@ -3637,6 +3755,184 @@ static SDValue performMulCombine(SDNode *N,
}
}
+static SDValue performANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Copied and modified from DAGCombiner::visitAND(SDNode *N)
+ // We have to do this because the original combiner doesn't work when ZEXTLOAD
+ // has custom lowering
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ // fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD &&
+ N0.getOperand(0).getResNo() == 0) ||
+ (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
+ auto *Load =
+ cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getZero(1);
+ if (const ConstantSDNode *C = isConstOrConstSplat(
+ N1, /*AllowUndefs=*/false, /*AllowTruncation=*/true)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ // Endianness should not matter here. Code below makes sure that we only
+ // use the result if the SplatBitSize is a multiple of the vector element
+ // size. And after that we AND all element sized parts of the splat
+ // together. So the end result should be the same regardless of in which
+ // order we do those operations.
+ const bool IsBigEndian = false;
+ bool IsSplat =
+ Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, EltBitWidth, IsBigEndian);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ Constant = APInt::getAllOnes(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+
+ /*bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
+ Load->getMemoryVT());*/
+ // MODIFIED: this is the one difference in the logic; we allow ZEXT combine
+ // only in addrspace 0, where it's legal
+ bool CanZextLoadProfitably = Load->getAddressSpace() == 0;
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default:
+ B = false;
+ break;
+ case ISD::EXTLOAD:
+ B = CanZextLoadProfitably;
+ break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD:
+ B = true;
+ break;
+ }
+
+ if (B && Constant.isAllOnes()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ DCI.CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DCI.DAG.getLoad(
+ Load->getAddressingMode(), ISD::ZEXTLOAD, Load->getValueType(0),
+ SDLoc(Load), Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = {NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2)};
+ DCI.CombineTo(Load, ArrayRef<SDValue>(To, 3), true);
+ } else {
+ DCI.CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ return SDValue();
+}
+
+static SDValue
+performSIGN_EXTEND_INREGCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Copied and modified from DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N)
+ // We have to do this because the original combiner doesn't work when SEXTLOAD
+ // has custom lowering
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = cast<VTSDNode>(N1)->getVT();
+ SDLoc DL(N);
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ // If sextload is not supported by target, we can only do the combine when
+ // load has one use. Doing otherwise can block folding the extload with other
+ // extends that the target does support.
+
+ // MODIFIED: replaced TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) with
+ // cast<LoadSDNode>(N0)->getAddressSpace() == 0)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!DCI.isAfterLegalizeDAG() && cast<LoadSDNode>(N0)->isSimple() &&
+ N0.hasOneUse()) ||
+ cast<LoadSDNode>(N0)->getAddressSpace() == 0)) {
+ auto *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad =
+ DCI.DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
+ LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
+ DCI.CombineTo(N, ExtLoad);
+ DCI.CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ DCI.AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+
+ // MODIFIED: replaced TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) with
+ // cast<LoadSDNode>(N0)->getAddressSpace() == 0)
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() && ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!DCI.isAfterLegalizeDAG() && cast<LoadSDNode>(N0)->isSimple()) &&
+ cast<LoadSDNode>(N0)->getAddressSpace() == 0)) {
+ auto *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad =
+ DCI.DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
+ LN0->getBasePtr(), ExtVT, LN0->getMemOperand());
+ DCI.CombineTo(N, ExtLoad);
+ DCI.CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -3672,5 +3968,9 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::MUL:
return performMulCombine(N, DCI);
+ case ISD::AND:
+ return performANDCombine(N, DCI);
+ case ISD::SIGN_EXTEND_INREG:
+ return performSIGN_EXTEND_INREGCombine(N, DCI);
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 72401a7a259c0..853f1338fb7ce 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -89,8 +89,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context,
- const Type *RetTy) const override;
+ LLVMContext &Context, const Type *RetTy) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
diff --git a/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
index 3da1ad9e36831..0112296df1aa8 100644
--- a/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
+++ b/llvm/test/CodeGen/WebAssembly/lower-load-wasm-global.ll
@@ -15,8 +15,9 @@ define i32 @zext_i8_i32() {
; CHECK-LABEL: zext_i8_i32:
; CHECK: .functype zext_i8_i32 () -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.load8_u globalI32
+; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: i32.const 255
+; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%v = load i8, ptr addrspace(1) @globalI32
%e = zext i8 %v to i32
@@ -27,8 +28,8 @@ define i32 @sext_i8_i32() {
; CHECK-LABEL: sext_i8_i32:
; CHECK: .functype sext_i8_i32 () -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.load8_s globalI32
+; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: i32.extend8_s
; CHECK-NEXT: # fallthrough-return
%v = load i8, ptr addrspace(1) @globalI32
%e = sext i8 %v to i32
@@ -39,8 +40,9 @@ define i32 @zext_i16_i32() {
; CHECK-LABEL: zext_i16_i32:
; CHECK: .functype zext_i16_i32 () -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.load16_u globalI32
+; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.and
; CHECK-NEXT: # fallthrough-return
%v = load i16, ptr addrspace(1) @globalI32
%e = zext i16 %v to i32
@@ -51,8 +53,8 @@ define i32 @sext_i16_i32() {
; CHECK-LABEL: sext_i16_i32:
; CHECK: .functype sext_i16_i32 () -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.load16_s globalI32
+; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: i32.extend16_s
; CHECK-NEXT: # fallthrough-return
%v = load i16, ptr addrspace(1) @globalI32
%e = sext i16 %v to i32
@@ -64,8 +66,9 @@ define i64 @zext_i8_i64() {
; CHECK-LABEL: zext_i8_i64:
; CHECK: .functype zext_i8_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load8_u globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.const 255
+; CHECK-NEXT: i64.and
; CHECK-NEXT: # fallthrough-return
%v = load i8, ptr addrspace(1) @globalI64
%e = zext i8 %v to i64
@@ -76,8 +79,8 @@ define i64 @sext_i8_i64() {
; CHECK-LABEL: sext_i8_i64:
; CHECK: .functype sext_i8_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load8_s globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.extend8_s
; CHECK-NEXT: # fallthrough-return
%v = load i8, ptr addrspace(1) @globalI64
%e = sext i8 %v to i64
@@ -88,8 +91,9 @@ define i64 @zext_i16_i64() {
; CHECK-LABEL: zext_i16_i64:
; CHECK: .functype zext_i16_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load16_u globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.const 65535
+; CHECK-NEXT: i64.and
; CHECK-NEXT: # fallthrough-return
%v = load i16, ptr addrspace(1) @globalI64
%e = zext i16 %v to i64
@@ -100,8 +104,8 @@ define i64 @sext_i16_i64() {
; CHECK-LABEL: sext_i16_i64:
; CHECK: .functype sext_i16_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load16_s globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.extend16_s
; CHECK-NEXT: # fallthrough-return
%v = load i16, ptr addrspace(1) @globalI64
%e = sext i16 %v to i64
@@ -112,8 +116,9 @@ define i64 @zext_i32_i64() {
; CHECK-LABEL: zext_i32_i64:
; CHECK: .functype zext_i32_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load32_u globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.and
; CHECK-NEXT: # fallthrough-return
%v = load i32, ptr addrspace(1) @globalI64
%e = zext i32 %v to i64
@@ -124,8 +129,8 @@ define i64 @sext_i32_i64() {
; CHECK-LABEL: sext_i32_i64:
; CHECK: .functype sext_i32_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load32_s globalI64
+; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i64.extend32_s
; CHECK-NEXT: # fallthrough-return
%v = load i32, ptr addrspace(1) @globalI64
%e = sext i32 %v to i64
@@ -138,6 +143,7 @@ define i64 @load_i64_from_i32() {
; CHECK: .functype load_i64_from_i32 () -> (i64)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get globalI32
+; CHECK-NEXT: i64.extend_i32_u
; CHECK-NEXT: # fallthrough-return
%v = load i64, ptr addrspace(1) @globalI32
ret i64 %v
@@ -148,6 +154,7 @@ define i32 @load_i32_from_i64() {
; CHECK: .functype load_i32_from_i64 () -> (i32)
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get globalI64
+; CHECK-NEXT: i32.wrap_i64
; CHECK-NEXT: # fallthrough-return
%v = load i32, ptr addrspace(1) @globalI64
ret i32 %v
@@ -157,8 +164,7 @@ define i8 @load_i8() {
; CHECK-LABEL: load_i8:
; CHECK: .functype load_i8 () -> (i32)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i32.load8_u globalI8
+; CHECK-NEXT: global.get globalI8
; CHECK-NEXT: # fallthrough-return
%v = load i8, ptr addrspace(1) @globalI8
ret i8 %v
@@ -168,8 +174,10 @@ define i64 @load_i16_from_i8_zext_to_i64() {
; CHECK-LABEL: load_i16_from_i8_zext_to_i64:
; CHECK: .functype load_i16_from_i8_zext_to_i64 () -> (i64)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i32.const 0
-; CHECK-NEXT: i64.load16_u globalI8
+; CHECK-NEXT: global.get globalI8
+; CHECK-NEXT: i64.extend_i32_u
+; CHECK-NEXT: i64.const 65535
+; CHECK-NEXT: i64.and
; CHECK-NEXT: # fallthrough-return
%v = load i16, ptr addrspace(1) @globalI8
%e = zext i16 %v to i64
More information about the llvm-commits
mailing list