[llvm] r362539 - Revert r362472 as it is breaking PPC build bots
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 4 11:48:43 PDT 2019
Author: nemanjai
Date: Tue Jun 4 11:48:43 2019
New Revision: 362539
URL: http://llvm.org/viewvc/llvm-project?rev=362539&view=rev
Log:
Revert r362472 as it is breaking PPC build bots
The patch https://reviews.llvm.org/rL362472 broke PPC LNT buildbots.
Reverting it to bring the bots back to green.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/store-combine.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=362539&r1=362538&r2=362539&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jun 4 11:48:43 2019
@@ -524,7 +524,6 @@ namespace {
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
- SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -6276,180 +6275,6 @@ static Optional<bool> isBigEndian(const
return BigEndian;
}
-static SDValue stripTruncAndExt(SDValue Value) {
- switch (Value.getOpcode()) {
- case ISD::TRUNCATE:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ANY_EXTEND:
- return stripTruncAndExt(Value.getOperand(0));
- }
- return Value;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
- // Collect all the stores in the chain.
- SDValue Chain;
- SmallVector<StoreSDNode *, 8> Stores;
- for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
- if (Store->getMemoryVT() != MVT::i8 ||
- Store->isVolatile() || Store->isIndexed())
- return SDValue();
- Stores.push_back(Store);
- Chain = Store->getChain();
- }
- // Handle the simple type only.
- unsigned Width = Stores.size();
- EVT VT = EVT::getIntegerVT(
- *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
- return SDValue();
-
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
- SDValue CombinedValue;
- SmallVector<int64_t, 4> ByteOffsets(Width);
- int64_t FirstOffset = INT64_MAX;
- StoreSDNode *FirstStore = nullptr;
- Optional<BaseIndexOffset> Base;
- for (auto Store : Stores) {
- // All the stores store different byte of the CombinedValue. A truncate is
- // required to get that byte value.
- SDValue Trunc = Store->getValue();
- if (Trunc.getOpcode() != ISD::TRUNCATE)
- return SDValue();
- // A shift operation is required to get the right byte offset, except the
- // first byte.
- int64_t Offset = 0;
- SDValue Value = Trunc.getOperand(0);
- if (Value.getOpcode() == ISD::SRL ||
- Value.getOpcode() == ISD::SRA) {
- ConstantSDNode *ShiftOffset =
- dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
- // a constant and a multiple of 8. It is the byte offset in "y".
- //
- // x = srl y, offset
- // i8 z = trunc x
- // store z, ...
- if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
- return SDValue();
-
- Offset = ShiftOffset->getSExtValue()/8;
- Value = Value.getOperand(0);
- }
-
- // Stores must share the same combined value with different offsets.
- if (!CombinedValue)
- CombinedValue = Value;
- else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
- return SDValue();
-
- // The trunc and all the extend operation should be stripped to get the
- // real value we are stored.
- else if (CombinedValue.getValueType() != VT) {
- if (Value.getValueType() == VT ||
- Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
- CombinedValue = Value;
- // Give up if the combined value type is smaller than the store size.
- if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
- return SDValue();
- }
-
- // Stores must share the same base address
- BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
- int64_t ByteOffsetFromBase = 0;
- if (!Base)
- Base = Ptr;
- else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
- return SDValue();
-
- // Remember the first byte store
- if (ByteOffsetFromBase < FirstOffset) {
- FirstStore = Store;
- FirstOffset = ByteOffsetFromBase;
- }
- // Map the offset in the store and the offset in the combined value.
- if (Offset < 0 || Offset >= Width)
- return SDValue();
- ByteOffsets[Offset] = ByteOffsetFromBase;
- }
-
- assert(FirstOffset != INT64_MAX && "First byte offset must be set");
- assert(FirstStore && "First store must be set");
-
- // Check if the bytes of the combined value we are looking at match with
- // either big or little endian value store.
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
- if (!IsBigEndian.hasValue())
- return SDValue();
-
- // The node we are looking at matches with the pattern, check if we can
- // replace it with a single bswap if needed and store.
-
- // If the store needs byte swap check if the target supports it
- bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
-
- // Before legalize we can introduce illegal bswaps which will be later
- // converted to an explicit bswap sequence. This way we end up with a single
- // store and byte shuffling instead of several stores and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
- return SDValue();
-
- // Check that a store of the wide type is both allowed and fast on the target
- bool Fast = false;
- bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, FirstStore->getAddressSpace(),
- FirstStore->getAlignment(), &Fast);
- if (!Allowed || !Fast)
- return SDValue();
-
- if (VT != CombinedValue.getValueType()) {
- assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
- "Get unexpected store value to combine");
- CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
- CombinedValue);
- }
-
- if (NeedsBswap)
- CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
-
- SDValue NewStore =
- DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
- FirstStore->getPointerInfo(), FirstStore->getAlignment());
-
- // Rely on other DAG combine rules to remove the other individual stores.
- DAG.ReplaceAllUsesWith(N, NewStore.getNode());
- return NewStore;
-}
-
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
@@ -15968,10 +15793,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
- // Try transforming several stores into STORE (BSWAP).
- if (SDValue Store = MatchStoreCombine(ST))
- return Store;
-
if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
Modified: llvm/trunk/test/CodeGen/PowerPC/store-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/store-combine.ll?rev=362539&r1=362538&r2=362539&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/store-combine.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/store-combine.ll Tue Jun 4 11:48:43 2019
@@ -10,12 +10,24 @@
define void @store_i32_by_i8(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: stw 3, 0(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: stb 3, 3(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stwbrx 3, 0, 4
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: stb 3, 3(4)
; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i32 %m to i8
@@ -43,12 +55,24 @@ entry:
define void @store_i32_by_i8_bswap(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: stwbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 0(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: stb 3, 3(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stw 3, 0(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 24
+; CHECK-PPC64-NEXT: srwi 6, 3, 16
+; CHECK-PPC64-NEXT: stb 5, 0(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 6, 1(4)
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: stb 3, 3(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i32 %m, 24
@@ -80,12 +104,40 @@ entry:
define void @store_i64_by_i8(i64 %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i64_by_i8:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: stdx 3, 0, 4
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT: stb 5, 3(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT: stb 5, 4(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT: stb 5, 5(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT: stb 5, 6(4)
+; CHECK-PPC64LE-NEXT: stb 3, 7(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i64_by_i8:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stdbrx 3, 0, 4
+; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT: stb 6, 2(4)
+; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT: stb 5, 3(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT: stb 6, 4(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: stb 5, 5(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT: stb 5, 6(4)
+; CHECK-PPC64-NEXT: stb 3, 7(4)
; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i64 %m to i8
@@ -133,12 +185,40 @@ entry:
define void @store_i64_by_i8_bswap(i64 %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: stdbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT: stb 3, 7(4)
+; CHECK-PPC64LE-NEXT: stb 5, 6(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT: stb 5, 5(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT: stb 5, 4(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT: stb 5, 3(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i64_by_i8_bswap:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stdx 3, 0, 4
+; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT: stb 5, 6(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT: stb 6, 5(4)
+; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT: stb 5, 4(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT: stb 6, 3(4)
+; CHECK-PPC64-NEXT: stb 3, 7(4)
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i64 %m to i8
@@ -187,18 +267,46 @@ entry:
define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap_uses:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: slwi [[REG:[0-9]+]], 3, 3
-; CHECK-PPC64LE-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]]
-; CHECK-PPC64LE-NEXT: extsw [[REG2:[0-9]+]], [[REG1]]
-; CHECK-PPC64LE-NEXT: stdbrx [[REG2]], 0, 4
+; CHECK-PPC64LE-NEXT: slwi 5, 3, 3
+; CHECK-PPC64LE-NEXT: subf 3, 3, 5
+; CHECK-PPC64LE-NEXT: extsw 3, 3
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT: stb 3, 7(4)
+; CHECK-PPC64LE-NEXT: stb 5, 6(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT: stb 5, 5(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT: stb 5, 4(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT: stb 5, 3(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i64_by_i8_bswap_uses:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: slwi [[REG:[0-9]+]], 3, 3
-; CHECK-PPC64-NEXT: subf [[REG1:[0-9]+]], 3, [[REG]]
-; CHECK-PPC64-NEXT: extsw [[REG2:[0-9]+]], [[REG1]]
-; CHECK-PPC64-NEXT: stdx [[REG2]], 0, 4
+; CHECK-PPC64-NEXT: slwi 5, 3, 3
+; CHECK-PPC64-NEXT: subf 3, 3, 5
+; CHECK-PPC64-NEXT: extsw 3, 3
+; CHECK-PPC64-NEXT: rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT: rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT: stb 5, 6(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT: stb 6, 5(4)
+; CHECK-PPC64-NEXT: rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT: stb 5, 4(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT: stb 6, 3(4)
+; CHECK-PPC64-NEXT: stb 3, 7(4)
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT: rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
; CHECK-PPC64-NEXT: blr
entry:
%mul = mul nsw i32 %t, 7
@@ -248,11 +356,25 @@ entry:
define void @store_i32_by_i8_bswap_volatile(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_volatile:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NOT: stwbrx
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 3, 3(4)
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_volatile:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NOT: stw
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 3, 3(4)
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i32 %m to i8
%arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -281,11 +403,29 @@ entry:
define void @store_i32_by_i8_bswap_store_in_between(i32 signext %m, i8* %p, i8* %q) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_store_in_between:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NOT: stwbrx
+; CHECK-PPC64LE-NEXT: srwi 6, 3, 8
+; CHECK-PPC64LE-NEXT: stb 3, 3(4)
+; CHECK-PPC64LE-NEXT: stb 6, 2(4)
+; CHECK-PPC64LE-NEXT: li 6, 3
+; CHECK-PPC64LE-NEXT: stb 6, 0(5)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_store_in_between:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NOT: stw
+; CHECK-PPC64-NEXT: li 6, 3
+; CHECK-PPC64-NEXT: srwi 7, 3, 8
+; CHECK-PPC64-NEXT: stb 7, 2(4)
+; CHECK-PPC64-NEXT: stb 3, 3(4)
+; CHECK-PPC64-NEXT: stb 6, 0(5)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i32 %m to i8
%arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -308,11 +448,25 @@ entry:
define void @store_i32_by_i8_bswap_unrelated_store(i32 signext %m, i8* %p, i8* %q) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_unrelated_store:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NOT: stwbrx
+; CHECK-PPC64LE-NEXT: srwi 6, 3, 8
+; CHECK-PPC64LE-NEXT: stb 3, 3(4)
+; CHECK-PPC64LE-NEXT: stb 6, 2(5)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 1(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_unrelated_store:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NOT: stw
+; CHECK-PPC64-NEXT: srwi 6, 3, 8
+; CHECK-PPC64-NEXT: stb 3, 3(4)
+; CHECK-PPC64-NEXT: stb 6, 2(5)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, 1(4)
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: blr
entry:
%conv = trunc i32 %m to i8
%arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -339,13 +493,24 @@ entry:
define void @store_i32_by_i8_bswap_nonzero_offset(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_nonzero_offset:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, 1
-; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]]
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, 3(4)
+; CHECK-PPC64LE-NEXT: stb 3, 4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 2(4)
+; CHECK-PPC64LE-NEXT: stb 3, 1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_nonzero_offset:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stw 3, 1(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 3, 4(4)
+; CHECK-PPC64-NEXT: stb 5, 3(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, 2(4)
+; CHECK-PPC64-NEXT: stb 3, 1(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i32 %m, 8
@@ -374,13 +539,24 @@ entry:
define void @store_i32_by_i8_neg_offset(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_neg_offset:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: stw 3, -4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, -3(4)
+; CHECK-PPC64LE-NEXT: stb 3, -4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, -2(4)
+; CHECK-PPC64LE-NEXT: stb 3, -1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_neg_offset:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: addi [[REG1:[0-9]+]], 4, -4
-; CHECK-PPC64-NEXT: stwbrx 3, 0, [[REG1]]
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 3, -4(4)
+; CHECK-PPC64-NEXT: stb 5, -3(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 3, 3, 24
+; CHECK-PPC64-NEXT: stb 5, -2(4)
+; CHECK-PPC64-NEXT: stb 3, -1(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i32 %m, 8
@@ -409,13 +585,24 @@ entry:
define void @store_i32_by_i8_bswap_neg_offset(i32 signext %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_neg_offset:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: addi [[REG1:[0-9]+]], 4, -4
-; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG1]]
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: stb 5, -3(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, -4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, -2(4)
+; CHECK-PPC64LE-NEXT: stb 3, -1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_neg_offset:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: stw 3, -4(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: srwi 6, 3, 24
+; CHECK-PPC64-NEXT: stb 5, -3(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 6, -4(4)
+; CHECK-PPC64-NEXT: stb 5, -2(4)
+; CHECK-PPC64-NEXT: stb 3, -1(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i32 %m, 16
@@ -444,17 +631,28 @@ entry:
define void @store_i32_by_i8_bswap_base_index_offset(i32 %m, i32 %i, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_base_index_offset:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
-; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], -4
-; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]]
+; CHECK-PPC64LE-NEXT: extsw 4, 4
+; CHECK-PPC64LE-NEXT: srwi 6, 3, 16
+; CHECK-PPC64LE-NEXT: add 4, 5, 4
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT: stb 6, -3(4)
+; CHECK-PPC64LE-NEXT: stb 5, -4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, -2(4)
+; CHECK-PPC64LE-NEXT: stb 3, -1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_base_index_offset:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
-; CHECK-PPC64-NEXT: stw 3, -4([[REG2]])
+; CHECK-PPC64-NEXT: extsw 4, 4
+; CHECK-PPC64-NEXT: srwi 6, 3, 16
+; CHECK-PPC64-NEXT: add 4, 5, 4
+; CHECK-PPC64-NEXT: srwi 5, 3, 24
+; CHECK-PPC64-NEXT: stb 6, -3(4)
+; CHECK-PPC64-NEXT: srwi 6, 3, 8
+; CHECK-PPC64-NEXT: stb 5, -4(4)
+; CHECK-PPC64-NEXT: stb 6, -2(4)
+; CHECK-PPC64-NEXT: stb 3, -1(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i32 %m, 16
@@ -496,17 +694,28 @@ entry:
define void @store_i32_by_i8_bswap_complicated(i32 %m, i32 %i, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_complicated:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64LE-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
-; CHECK-PPC64LE-NEXT: addi [[REG3:[0-9]+]], [[REG2]], 3
-; CHECK-PPC64LE-NEXT: stwbrx 3, 0, [[REG3]]
+; CHECK-PPC64LE-NEXT: extsw 4, 4
+; CHECK-PPC64LE-NEXT: add 4, 5, 4
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT: stb 5, 3(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT: stb 5, 4(4)
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, 5(4)
+; CHECK-PPC64LE-NEXT: stb 3, 6(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_complicated:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64-NEXT: add [[REG2:[0-9]+]], 5, [[REG1]]
-; CHECK-PPC64-NEXT: stw 3, 3([[REG2]])
+; CHECK-PPC64-NEXT: extsw 4, 4
+; CHECK-PPC64-NEXT: srwi 6, 3, 24
+; CHECK-PPC64-NEXT: add 4, 5, 4
+; CHECK-PPC64-NEXT: srwi 5, 3, 16
+; CHECK-PPC64-NEXT: stb 6, 3(4)
+; CHECK-PPC64-NEXT: stb 5, 4(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 5, 5(4)
+; CHECK-PPC64-NEXT: stb 3, 6(4)
; CHECK-PPC64-NEXT: blr
entry:
%idx.ext = sext i32 %i to i64
@@ -536,12 +745,16 @@ entry:
define void @store_i16_by_i8_bswap(i16 %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_i16_by_i8_bswap:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: sthbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT: srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT: stb 5, 0(4)
+; CHECK-PPC64LE-NEXT: stb 3, 1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_i16_by_i8_bswap:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: sth 3, 0(4)
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 5, 0(4)
+; CHECK-PPC64-NEXT: stb 3, 1(4)
; CHECK-PPC64-NEXT: blr
entry:
%0 = lshr i16 %m, 8
@@ -558,12 +771,16 @@ entry:
define void @store_16_by_i8(i16 %m, i8* %p) {
; CHECK-PPC64LE-LABEL: store_16_by_i8:
; CHECK-PPC64LE: # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT: sth 3, 0(4)
+; CHECK-PPC64LE-NEXT: stb 3, 0(4)
+; CHECK-PPC64LE-NEXT: srwi 3, 3, 8
+; CHECK-PPC64LE-NEXT: stb 3, 1(4)
; CHECK-PPC64LE-NEXT: blr
;
; CHECK-PPC64-LABEL: store_16_by_i8:
; CHECK-PPC64: # %bb.0: # %entry
-; CHECK-PPC64-NEXT: sthbrx 3, 0, 4
+; CHECK-PPC64-NEXT: srwi 5, 3, 8
+; CHECK-PPC64-NEXT: stb 3, 0(4)
+; CHECK-PPC64-NEXT: stb 5, 1(4)
; CHECK-PPC64-NEXT: blr
entry:
%conv1 = trunc i16 %m to i8
More information about the llvm-commits
mailing list