[llvm] 47269da - GlobalISel: Handle lowering non-power-of-2 extloads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 08:54:18 PDT 2021
Author: Matt Arsenault
Date: 2021-07-14T11:54:11-04:00
New Revision: 47269da5d83e079a565439cbbacd4bb119ed0aff
URL: https://github.com/llvm/llvm-project/commit/47269da5d83e079a565439cbbacd4bb119ed0aff
DIFF: https://github.com/llvm/llvm-project/commit/47269da5d83e079a565439cbbacd4bb119ed0aff.diff
LOG: GlobalISel: Handle lowering non-power-of-2 extloads
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b31cf3f386244..354cecf5591bd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2849,60 +2849,62 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
return Legalized;
}
- if (DstTy.getSizeInBits() != MMO.getSizeInBits())
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
return UnableToLegalize;
+ if (isPowerOf2_32(MemSizeInBits))
+ return UnableToLegalize; // Don't know what we're being asked to do.
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+ // Big endian lowering not implemented.
+ if (MIRBuilder.getDataLayout().isBigEndian())
+ return UnableToLegalize;
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
-
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
-
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
- auto SmallLoad = MIRBuilder.buildLoad(AnyExtTy, SmallPtr,
- *SmallMMO);
-
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, AnyExtTy, PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ auto SmallLoad = MIRBuilder.buildLoadInstr(
+ MI.getOpcode(), AnyExtTy, SmallPtr, *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+
+ if (AnyExtTy == DstTy)
+ MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
+ else {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
- MI.eraseFromParent();
- return Legalized;
}
- return UnableToLegalize;
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 31a7c88442f0b..f8bba5a2596ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1274,6 +1274,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lower();
}
+ // FIXME: Unaligned accesses not lowered.
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8},
{S32, GlobalPtr, S16, 2 * 8},
@@ -1302,7 +1303,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
ExtLoads.clampScalar(0, S32, S32)
.widenScalarToNextPow2(0)
- .unsupportedIfMemSizeNotPow2()
.lower();
auto &Atomics = getActionDefinitionsBuilder(
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index ac2d561c60873..29a3b22cf79a0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -15721,6 +15721,374 @@ body: |
$vgpr0 = COPY %1
...
+---
+name: test_ext_load_global_s32_from_s24_align1
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+ ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+ ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+ ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+ ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+ ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+ ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+ ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+ ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; SI: $vgpr0 = COPY [[COPY4]](s32)
+ ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+ ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+ ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+ ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CI-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+ ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+ ; CI-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+ ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
+ ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+ ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+ ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+ ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+ ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
+ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32)
+ ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+ ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+ ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; CI-MESA: $vgpr0 = COPY [[COPY4]](s32)
+ ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+ ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+ ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+ ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+ ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+ ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+ ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+ ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+ ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; VI: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
+ ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+ ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+ ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align1
+ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s8) from unknown-address + 2, addrspace 1)
+ ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-MESA: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
+ ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C2]]
+ ; GFX9-MESA: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C3]](s16)
+ ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+ ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
+ ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C2]]
+ ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C2]]
+ ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16)
+ ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+ ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX9-MESA: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32)
+ ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_LOAD %0 :: (load (s24), align 1, addrspace 1)
+ $vgpr0 = COPY %1
+...
+---
+name: test_ext_load_global_s32_from_s24_align2
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+ ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+ ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+ ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+ ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+ ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+ ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+ ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; SI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+ ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+ ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+ ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+ ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+ ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; SI: $vgpr0 = COPY [[COPY5]](s32)
+ ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+ ; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; CI-HSA: $vgpr0 = COPY [[OR]](s32)
+ ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; CI-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CI-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+ ; CI-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+ ; CI-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CI-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+ ; CI-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CI-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; CI-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+ ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; CI-MESA: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+ ; CI-MESA: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+ ; CI-MESA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY1]](s32)
+ ; CI-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+ ; CI-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
+ ; CI-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; CI-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+ ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+ ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; CI-MESA: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
+ ; CI-MESA: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY3]](s32)
+ ; CI-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+ ; CI-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC3]]
+ ; CI-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; CI-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; CI-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+ ; CI-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; CI-MESA: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; CI-MESA: $vgpr0 = COPY [[COPY5]](s32)
+ ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+ ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+ ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+ ; VI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+ ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+ ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+ ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+ ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+ ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+ ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+ ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+ ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; VI: $vgpr0 = COPY [[COPY1]](s32)
+ ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
+ ; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX9-HSA: $vgpr0 = COPY [[OR]](s32)
+ ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align2
+ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), addrspace 1)
+ ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX9-MESA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9-MESA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
+ ; GFX9-MESA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9-MESA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
+ ; GFX9-MESA: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; GFX9-MESA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C3]](s32)
+ ; GFX9-MESA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GFX9-MESA: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
+ ; GFX9-MESA: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]]
+ ; GFX9-MESA: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+ ; GFX9-MESA: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]]
+ ; GFX9-MESA: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX9-MESA: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16)
+ ; GFX9-MESA: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+ ; GFX9-MESA: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
+ ; GFX9-MESA: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]]
+ ; GFX9-MESA: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32)
+ ; GFX9-MESA: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]]
+ ; GFX9-MESA: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16)
+ ; GFX9-MESA: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+ ; GFX9-MESA: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX9-MESA: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+ ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]]
+ ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
+ ; GFX9-MESA: $vgpr0 = COPY [[COPY1]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_LOAD %0 :: (load (s24), align 2, addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_ext_load_global_s32_from_s24_align4
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; SI: $vgpr0 = COPY [[LOAD]](s32)
+ ; CI-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; CI-HSA: $vgpr0 = COPY [[LOAD]](s32)
+ ; CI-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; CI-MESA: $vgpr0 = COPY [[LOAD]](s32)
+ ; VI-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; VI: $vgpr0 = COPY [[LOAD]](s32)
+ ; GFX9-HSA-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; GFX9-HSA: $vgpr0 = COPY [[LOAD]](s32)
+ ; GFX9-MESA-LABEL: name: test_ext_load_global_s32_from_s24_align4
+ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)
+ ; GFX9-MESA: $vgpr0 = COPY [[LOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_LOAD %0 :: (load (s24), align 4, addrspace 1)
+ $vgpr0 = COPY %1
+...
+
---
name: test_ext_load_global_s64_from_1_align4
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
index 9c4b23fe15dc4..7b9ede5651cea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
@@ -6,7 +6,6 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16)
@@ -64,12 +63,24 @@ body: |
; GFX8-LABEL: name: test_sextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
- ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+ ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX8: $vgpr0 = COPY [[OR]](s32)
; GFX6-LABEL: name: test_sextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
- ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
index 6e89156519ec3..defa85abd1475 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
@@ -6,7 +6,6 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4)
@@ -65,12 +64,24 @@ body: |
; GFX8-LABEL: name: test_zextload_global_i32_i24
; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
- ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+ ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX8: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX8: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+ ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX8: $vgpr0 = COPY [[OR]](s32)
; GFX6-LABEL: name: test_zextload_global_i32_i24
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
- ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 2, align 2, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1)
$vgpr0 = COPY %1
More information about the llvm-commits
mailing list