[llvm] r358613 - [GlobalISel] Add legalization support for non-power-2 loads and stores
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 16:05:31 PDT 2019
I’m taking a look, will revert if the fix doesn’t look obvious.
Thanks,
Amara
> On Apr 18, 2019, at 1:03 PM, Adhemerval Zanella <adhemerval.zanella at linaro.org> wrote:
>
> This caused* ...
>
>> On 18/04/2019 16:59, Adhemerval Zanella wrote:
>> This a test-suite regression on aarch64 linux bot [1]. More specifically:
>>
>> FAIL: test-suite::Obsequi.test
>>
>> The tests now takes mores than 10 minutes where it was taking less than 10
>> seconds.
>
> To be more specific, the consumer-typeset.test failure I am still investigating.
>
>>
>> [1] http://lab.llvm.org:8011/builders/clang-cmake-aarch64-lld/builds/6426
>>
>>> On 17/04/2019 18:30, Amara Emerson via llvm-commits wrote:
>>> Author: aemerson
>>> Date: Wed Apr 17 14:30:07 2019
>>> New Revision: 358613
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=358613&view=rev
>>> Log:
>>> [GlobalISel] Add legalization support for non-power-2 loads and stores
>>>
>>> Legalize things like i24 load/store by splitting them into smaller power of 2 operations.
>>>
>>> This matches how SelectionDAG handles these operations.
>>>
>>> Differential Revision: https://reviews.llvm.org/D59971
>>>
>>> Added:
>>> llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
>>> Modified:
>>> llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
>>> llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
>>> llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
>>> llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
>>>
>>> Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h?rev=358613&r1=358612&r2=358613&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h (original)
>>> +++ llvm/trunk/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h Wed Apr 17 14:30:07 2019
>>> @@ -639,6 +639,10 @@ public:
>>> return actionIf(LegalizeAction::Unsupported,
>>> LegalityPredicates::memSizeInBytesNotPow2(0));
>>> }
>>> + LegalizeRuleSet &lowerIfMemSizeNotPow2() {
>>> + return actionIf(LegalizeAction::Lower,
>>> + LegalityPredicates::memSizeInBytesNotPow2(0));
>>> + }
>>>
>>> LegalizeRuleSet &customIf(LegalityPredicate Predicate) {
>>> // We have no choice but conservatively assume that a custom action with a
>>>
>>> Modified: llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp?rev=358613&r1=358612&r2=358613&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (original)
>>> +++ llvm/trunk/lib/CodeGen/GlobalISel/LegalizerHelper.cpp Wed Apr 17 14:30:07 2019
>>> @@ -1484,10 +1484,56 @@ LegalizerHelper::lower(MachineInstr &MI,
>>> auto &MMO = **MI.memoperands_begin();
>>>
>>> if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
>>> - // In the case of G_LOAD, this was a non-extending load already and we're
>>> - // about to lower to the same instruction.
>>> - if (MI.getOpcode() == TargetOpcode::G_LOAD)
>>> + if (MI.getOpcode() == TargetOpcode::G_LOAD) {
>>> + // This load needs splitting into power of 2 sized loads.
>>> + if (DstTy.isVector())
>>> return UnableToLegalize;
>>> + if (isPowerOf2_32(DstTy.getSizeInBits()))
>>> + return UnableToLegalize; // Don't know what we're being asked to do.
>>> +
>>> + // Our strategy here is to generate anyextending loads for the smaller
>>> + // types up to next power-2 result type, and then combine the two larger
>>> + // result values together, before truncating back down to the non-pow-2
>>> + // type.
>>> + // E.g. v1 = i24 load =>
>>> + // v2 = i32 load (2 byte)
>>> + // v3 = i32 load (1 byte)
>>> + // v4 = i32 shl v2, 16
>>> + // v5 = i32 or v4, v3
>>> + // v1 = i24 trunc v5
>>> + // By doing this we generate the correct truncate which should get
>>> + // combined away as an artifact with a matching extend.
>>> + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
>>> + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
>>> +
>>> + MachineFunction &MF = MIRBuilder.getMF();
>>> + MachineMemOperand *LargeMMO =
>>> + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
>>> + MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
>>> + &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
>>> +
>>> + LLT PtrTy = MRI.getType(PtrReg);
>>> + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
>>> + LLT AnyExtTy = LLT::scalar(AnyExtSize);
>>> + unsigned LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
>>> + unsigned SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
>>> + auto LargeLoad =
>>> + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
>>> +
>>> + auto OffsetCst =
>>> + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
>>> + unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy);
>>> + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
>>> + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
>>> + *SmallMMO);
>>> +
>>> + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
>>> + auto Shift = MIRBuilder.buildShl(AnyExtTy, LargeLoad, ShiftAmt);
>>> + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, SmallLoad);
>>> + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
>>> + MI.eraseFromParent();
>>> + return Legalized;
>>> + }
>>> MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
>>> MI.eraseFromParent();
>>> return Legalized;
>>> @@ -1516,6 +1562,51 @@ LegalizerHelper::lower(MachineInstr &MI,
>>>
>>> return UnableToLegalize;
>>> }
>>> + case TargetOpcode::G_STORE: {
>>> + // Lower a non-power of 2 store into multiple pow-2 stores.
>>> + // E.g. split an i24 store into an i16 store + i8 store.
>>> + // We do this by first extending the stored value to the next largest power
>>> + // of 2 type, and then using truncating stores to store the components.
>>> + // By doing this, likewise with G_LOAD, generate an extend that can be
>>> + // artifact-combined away instead of leaving behind extracts.
>>> + unsigned SrcReg = MI.getOperand(0).getReg();
>>> + unsigned PtrReg = MI.getOperand(1).getReg();
>>> + LLT SrcTy = MRI.getType(SrcReg);
>>> + MachineMemOperand &MMO = **MI.memoperands_begin();
>>> + if (SrcTy.getSizeInBits() != MMO.getSize() /* in bytes */ * 8)
>>> + return UnableToLegalize;
>>> + if (SrcTy.isVector())
>>> + return UnableToLegalize;
>>> + if (isPowerOf2_32(SrcTy.getSizeInBits()))
>>> + return UnableToLegalize; // Don't know what we're being asked to do.
>>> +
>>> + // Extend to the next pow-2.
>>> + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
>>> + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
>>> +
>>> + // Obtain the smaller value by shifting away the larger value.
>>> + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
>>> + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
>>> + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
>>> + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
>>> +
>>> + // Generate the GEP and truncating stores.
>>> + LLT PtrTy = MRI.getType(PtrReg);
>>> + auto OffsetCst =
>>> + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
>>> + unsigned GEPReg = MRI.createGenericVirtualRegister(PtrTy);
>>> + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
>>> +
>>> + MachineFunction &MF = MIRBuilder.getMF();
>>> + MachineMemOperand *LargeMMO =
>>> + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
>>> + MachineMemOperand *SmallMMO =
>>> + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
>>> + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
>>> + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
>>> + MI.eraseFromParent();
>>> + return Legalized;
>>> + }
>>> case TargetOpcode::G_CTLZ_ZERO_UNDEF:
>>> case TargetOpcode::G_CTTZ_ZERO_UNDEF:
>>> case TargetOpcode::G_CTLZ:
>>>
>>> Modified: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp?rev=358613&r1=358612&r2=358613&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp (original)
>>> +++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp Wed Apr 17 14:30:07 2019
>>> @@ -234,14 +234,12 @@ AArch64LegalizerInfo::AArch64LegalizerIn
>>> .legalForTypesWithMemDesc({{s32, p0, 8, 8},
>>> {s32, p0, 16, 8}})
>>> .clampScalar(0, s8, s64)
>>> - .widenScalarToNextPow2(0)
>>> - // TODO: We could support sum-of-pow2's but the lowering code doesn't know
>>> - // how to do that yet.
>>> - .unsupportedIfMemSizeNotPow2()
>>> + .lowerIfMemSizeNotPow2()
>>> // Lower any any-extending loads left into G_ANYEXT and G_LOAD
>>> .lowerIf([=](const LegalityQuery &Query) {
>>> return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
>>> })
>>> + .widenScalarToNextPow2(0)
>>> .clampMaxNumElements(0, s32, 2)
>>> .clampMaxNumElements(0, s64, 1)
>>> .customIf(IsPtrVecPred);
>>> @@ -249,6 +247,8 @@ AArch64LegalizerInfo::AArch64LegalizerIn
>>> getActionDefinitionsBuilder(G_STORE)
>>> .legalForTypesWithMemDesc({{s8, p0, 8, 8},
>>> {s16, p0, 16, 8},
>>> + {s32, p0, 8, 8},
>>> + {s32, p0, 16, 8},
>>> {s32, p0, 32, 8},
>>> {s64, p0, 64, 8},
>>> {p0, p0, 64, 8},
>>> @@ -259,10 +259,7 @@ AArch64LegalizerInfo::AArch64LegalizerIn
>>> {v4s32, p0, 128, 8},
>>> {v2s64, p0, 128, 8}})
>>> .clampScalar(0, s8, s64)
>>> - .widenScalarToNextPow2(0)
>>> - // TODO: We could support sum-of-pow2's but the lowering code doesn't know
>>> - // how to do that yet.
>>> - .unsupportedIfMemSizeNotPow2()
>>> + .lowerIfMemSizeNotPow2()
>>> .lowerIf([=](const LegalityQuery &Query) {
>>> return Query.Types[0].isScalar() &&
>>> Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
>>>
>>> Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll?rev=358613&r1=358612&r2=358613&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll (original)
>>> +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll Wed Apr 17 14:30:07 2019
>>> @@ -54,26 +54,6 @@ false:
>>>
>>> }
>>>
>>> -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s32) = G_LOAD %1:_(p0) :: (load 3 from `i24* undef`, align 1) (in function: odd_type_load)
>>> -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type_load
>>> -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type_load
>>> -define i32 @odd_type_load() {
>>> -entry:
>>> - %ld = load i24, i24* undef, align 1
>>> - %cst = zext i24 %ld to i32
>>> - ret i32 %cst
>>> -}
>>> -
>>> - ; General legalizer inability to handle types whose size wasn't a power of 2.
>>> -; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(s42), %0:_(p0) :: (store 6 into %ir.addr, align 8) (in function: odd_type)
>>> -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type
>>> -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type:
>>> -define void @odd_type(i42* %addr) {
>>> - %val42 = load i42, i42* %addr
>>> - store i42 %val42, i42* %addr
>>> - ret void
>>> -}
>>> -
>>> ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector)
>>> ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
>>> ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
>>>
>>> Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir?rev=358613&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir (added)
>>> +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir Wed Apr 17 14:30:07 2019
>>> @@ -0,0 +1,49 @@
>>> +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
>>> +# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s
>>> +--- |
>>> + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
>>> + target triple = "aarch64"
>>> +
>>> + define i32 @load_store_test(i24* %ptr, i24* %ptr2) {
>>> + %val = load i24, i24* %ptr
>>> + store i24 %val, i24* %ptr2
>>> + ret i32 0
>>> + }
>>> +
>>> +...
>>> +---
>>> +name: load_store_test
>>> +alignment: 2
>>> +tracksRegLiveness: true
>>> +body: |
>>> + bb.1 (%ir-block.0):
>>> + liveins: $x0, $x1
>>> +
>>> + ; CHECK-LABEL: name: load_store_test
>>> + ; CHECK: liveins: $x0, $x1
>>> + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
>>> + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
>>> + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
>>> + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.ptr, align 4)
>>> + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
>>> + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64)
>>> + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p0) :: (load 1 from %ir.ptr + 2, align 4)
>>> + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
>>> + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32)
>>> + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LOAD1]]
>>> + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
>>> + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C2]](s32)
>>> + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C1]](s64)
>>> + ; CHECK: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4)
>>> + ; CHECK: G_STORE [[LSHR]](s32), [[GEP1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4)
>>> + ; CHECK: $w0 = COPY [[C]](s32)
>>> + ; CHECK: RET_ReallyLR implicit $w0
>>> + %0:_(p0) = COPY $x0
>>> + %1:_(p0) = COPY $x1
>>> + %3:_(s32) = G_CONSTANT i32 0
>>> + %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 4)
>>> + G_STORE %2(s24), %1(p0) :: (store 3 into %ir.ptr2, align 4)
>>> + $w0 = COPY %3(s32)
>>> + RET_ReallyLR implicit $w0
>>> +
>>> +...
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
More information about the llvm-commits
mailing list