[llvm] r185065 - [SystemZ] Add conditional store patterns
Richard Sandiford
rsandifo at linux.vnet.ibm.com
Thu Jun 27 02:27:40 PDT 2013
Author: rsandifo
Date: Thu Jun 27 04:27:40 2013
New Revision: 185065
URL: http://llvm.org/viewvc/llvm-project?rev=185065&view=rev
Log:
[SystemZ] Add conditional store patterns
Add pseudo conditional store instructions, so that we use:
branch foo:
store
foo:
instead of:
load
branch foo:
move
foo:
store
z196 has real 32-bit and 64-bit conditional stores, but we don't use
any z196 instructions yet.
Added:
llvm/trunk/test/CodeGen/SystemZ/cond-store-01.ll
llvm/trunk/test/CodeGen/SystemZ/cond-store-02.ll
llvm/trunk/test/CodeGen/SystemZ/cond-store-03.ll
llvm/trunk/test/CodeGen/SystemZ/cond-store-04.ll
llvm/trunk/test/CodeGen/SystemZ/cond-store-05.ll
llvm/trunk/test/CodeGen/SystemZ/cond-store-06.ll
Modified:
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td
llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td
llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
llvm/trunk/lib/Target/SystemZ/SystemZPatterns.td
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Thu Jun 27 04:27:40 2013
@@ -1696,6 +1696,59 @@ SystemZTargetLowering::emitSelect(Machin
return JoinMBB;
}
+// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
+// StoreOpcode is the store to use and Invert says whether the store should
+// happen when the condition is false rather than true.
+MachineBasicBlock *
+SystemZTargetLowering::emitCondStore(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned StoreOpcode, bool Invert) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+
+ MachineOperand Base = MI->getOperand(0);
+ int64_t Disp = MI->getOperand(1).getImm();
+ unsigned IndexReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(3).getReg();
+ unsigned CCMask = MI->getOperand(4).getImm();
+ DebugLoc DL = MI->getDebugLoc();
+
+ StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
+
+ // Get the condition needed to branch around the store.
+ if (!Invert)
+ CCMask = CCMask ^ SystemZ::CCMASK_ANY;
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // BRC CCMask, JoinMBB
+ // # fallthrough to FalseMBB
+ //
+ // The original DAG glues comparisons to their uses, both to ensure
+ // that no CC-clobbering instructions are inserted between them, and
+ // to ensure that comparison results are not reused. This means that
+ // this CondStore is the sole user of any preceding comparison instruction
+ // and that we can try to use a fused compare and branch instead.
+ MBB = StartMBB;
+ if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(FalseMBB);
+
+ // FalseMBB:
+ // store %SrcReg, %Disp(%Index,%Base)
+ // # fallthrough to JoinMBB
+ MBB = FalseMBB;
+ BuildMI(MBB, DL, TII->get(StoreOpcode))
+ .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
+ MBB->addSuccessor(JoinMBB);
+
+ MI->eraseFromParent();
+ return JoinMBB;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
@@ -2100,6 +2153,43 @@ EmitInstrWithCustomInserter(MachineInstr
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
+ case SystemZ::CondStore8_32:
+ return emitCondStore(MI, MBB, SystemZ::STC32, false);
+ case SystemZ::CondStore8_32Inv:
+ return emitCondStore(MI, MBB, SystemZ::STC32, true);
+ case SystemZ::CondStore16_32:
+ return emitCondStore(MI, MBB, SystemZ::STH32, false);
+ case SystemZ::CondStore16_32Inv:
+ return emitCondStore(MI, MBB, SystemZ::STH32, true);
+ case SystemZ::CondStore32_32:
+ return emitCondStore(MI, MBB, SystemZ::ST32, false);
+ case SystemZ::CondStore32_32Inv:
+ return emitCondStore(MI, MBB, SystemZ::ST32, true);
+ case SystemZ::CondStore8:
+ return emitCondStore(MI, MBB, SystemZ::STC, false);
+ case SystemZ::CondStore8Inv:
+ return emitCondStore(MI, MBB, SystemZ::STC, true);
+ case SystemZ::CondStore16:
+ return emitCondStore(MI, MBB, SystemZ::STH, false);
+ case SystemZ::CondStore16Inv:
+ return emitCondStore(MI, MBB, SystemZ::STH, true);
+ case SystemZ::CondStore32:
+ return emitCondStore(MI, MBB, SystemZ::ST, false);
+ case SystemZ::CondStore32Inv:
+ return emitCondStore(MI, MBB, SystemZ::ST, true);
+ case SystemZ::CondStore64:
+ return emitCondStore(MI, MBB, SystemZ::STG, false);
+ case SystemZ::CondStore64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STG, true);
+ case SystemZ::CondStoreF32:
+ return emitCondStore(MI, MBB, SystemZ::STE, false);
+ case SystemZ::CondStoreF32Inv:
+ return emitCondStore(MI, MBB, SystemZ::STE, true);
+ case SystemZ::CondStoreF64:
+ return emitCondStore(MI, MBB, SystemZ::STD, false);
+ case SystemZ::CondStoreF64Inv:
+ return emitCondStore(MI, MBB, SystemZ::STD, true);
+
case SystemZ::AEXT128_64:
return emitExt128(MI, MBB, false, SystemZ::subreg_low);
case SystemZ::ZEXT128_32:
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Thu Jun 27 04:27:40 2013
@@ -203,6 +203,9 @@ private:
// Implement EmitInstrWithCustomInserter for individual operation types.
MachineBasicBlock *emitSelect(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitCondStore(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned StoreOpcode, bool Invert) const;
MachineBasicBlock *emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrFP.td Thu Jun 27 04:27:40 2013
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Control-flow instructions
+// Select instructions
//===----------------------------------------------------------------------===//
// C's ?: operator for floating-point operands.
@@ -16,6 +16,11 @@ def SelectF32 : SelectWrapper<FP32>;
def SelectF64 : SelectWrapper<FP64>;
def SelectF128 : SelectWrapper<FP128>;
+defm CondStoreF32 : CondStores<FP32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
//===----------------------------------------------------------------------===//
// Move instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td Thu Jun 27 04:27:40 2013
@@ -956,6 +956,19 @@ class SelectWrapper<RegisterOperand cls>
let Uses = [CC];
}
+// Stores $new to $addr if $cc is true ("" case) or false (Inv case).
+multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
+ SDPatternOperator load, AddressingMode mode> {
+ let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
+ def "" : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc),
+ [(store (z_select_ccmask cls:$new, (load mode:$addr),
+ imm:$cc), mode:$addr)]>;
+ def Inv : Pseudo<(outs), (ins mode:$addr, cls:$new, i8imm:$cc),
+ [(store (z_select_ccmask (load mode:$addr), cls:$new,
+ imm:$cc), mode:$addr)]>;
+ }
+}
+
// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
// describe the second (non-memory) operand.
class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td Thu Jun 27 04:27:40 2013
@@ -163,9 +163,29 @@ defm AsmJE : IntCondExtendedMnemonic<8
defm AsmJHE : IntCondExtendedMnemonic<10, "he", "nl">;
defm AsmJLE : IntCondExtendedMnemonic<12, "le", "nh">;
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
def Select32 : SelectWrapper<GR32>;
def Select64 : SelectWrapper<GR64>;
+defm CondStore8_32 : CondStores<GR32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm CondStore16_32 : CondStores<GR32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm CondStore32_32 : CondStores<GR32, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
+defm CondStore8 : CondStores<GR64, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm CondStore16 : CondStores<GR64, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm CondStore32 : CondStores<GR64, nonvolatile_truncstorei32,
+ nonvolatile_anyextloadi32, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, nonvolatile_store,
+ nonvolatile_load, bdxaddr20only>;
+
//===----------------------------------------------------------------------===//
// Call instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZOperators.td?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZOperators.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZOperators.td Thu Jun 27 04:27:40 2013
@@ -120,6 +120,20 @@ def zext32 : PatFrag<(ops node:$src), (z
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+// Extending loads in which the extension type doesn't matter.
+def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
+}]>;
+def anyextloadi8 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def anyextloadi16 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def anyextloadi32 : PatFrag<(ops node:$ptr), (anyextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
// Aligned loads.
class AlignedLoad<SDPatternOperator load>
: PatFrag<(ops node:$addr), (load node:$addr), [{
@@ -149,7 +163,10 @@ class NonvolatileLoad<SDPatternOperator
LoadSDNode *Load = cast<LoadSDNode>(N);
return !Load->isVolatile();
}]>;
-def nonvolatile_load : NonvolatileLoad<load>;
+def nonvolatile_load : NonvolatileLoad<load>;
+def nonvolatile_anyextloadi8 : NonvolatileLoad<anyextloadi8>;
+def nonvolatile_anyextloadi16 : NonvolatileLoad<anyextloadi16>;
+def nonvolatile_anyextloadi32 : NonvolatileLoad<anyextloadi32>;
// Non-volatile stores.
class NonvolatileStore<SDPatternOperator store>
@@ -157,7 +174,10 @@ class NonvolatileStore<SDPatternOperator
StoreSDNode *Store = cast<StoreSDNode>(N);
return !Store->isVolatile();
}]>;
-def nonvolatile_store : NonvolatileStore<store>;
+def nonvolatile_store : NonvolatileStore<store>;
+def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
+def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
+def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
// Insertions.
def inserti8 : PatFrag<(ops node:$src1, node:$src2),
Modified: llvm/trunk/lib/Target/SystemZ/SystemZPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZPatterns.td?rev=185065&r1=185064&r2=185065&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZPatterns.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZPatterns.td Thu Jun 27 04:27:40 2013
@@ -50,12 +50,8 @@ class RMWI<SDPatternOperator load, SDPat
// memory location. IMM is the type of the second operand.
multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
Instruction insn> {
- def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
- def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
- def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
- def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<anyextloadi8, operator, truncstorei8, mode, imm64, insn>;
}
// Record that INSN performs insertion TYPE into a register of class CLS.
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-01.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-01.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,396 @@
+; Test 8-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(i8 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %alt, i8 %orig
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 32 bits, with the
+; loaded value first.
+define void @f3(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = sext i8 %orig to i32
+ %res = select i1 %cond, i32 %ext, i32 %alt
+ %trunc = trunc i32 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = sext i8 %orig to i32
+ %res = select i1 %cond, i32 %alt, i32 %ext
+ %trunc = trunc i32 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = zext i8 %orig to i32
+ %res = select i1 %cond, i32 %ext, i32 %alt
+ %trunc = trunc i32 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i8 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = zext i8 %orig to i32
+ %res = select i1 %cond, i32 %alt, i32 %ext
+ %trunc = trunc i32 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f7(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = sext i8 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f8(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = sext i8 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 64 bits, with the
+; loaded value first.
+define void @f9(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = zext i8 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f10(i8 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f10:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %ext = zext i8 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; Check the high end of the STC range.
+define void @f11(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stc %r3, 4095(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 4095
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check the next byte up, which should use STCY instead of STC.
+define void @f12(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 4096
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check the high end of the STCY range.
+define void @f13(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, 524287(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 524287
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f14(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f14:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 524288
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check the low end of the STCY range.
+define void @f15(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f15:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 -524288
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f16(i8 *%base, i8 %alt, i32 %limit) {
+; CHECK: f16:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524289
+; CHECK: stc %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i8 *%base, i64 -524289
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check that STCY allows an index.
+define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) {
+; CHECK: f17:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stcy %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 4096
+ %ptr = inttoptr i64 %add2 to i8 *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK: f18:
+; CHECK: lb {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stc {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %r3.
+define void @f19(i8 *%ptr, i8 %alt, i32 %limit) {
+; CHECK: f19:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lb %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store volatile i8 %res, i8 *%ptr
+ ret void
+}
+
+; Check that atomic loads are not matched. The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK: f20:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stc {{%r[0-9]+}},
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load atomic i8 *%ptr unordered, align 1
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; ...likewise stores.
+define void @f21(i8 *%ptr, i8 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK: f21:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lb %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store atomic i8 %res, i8 *%ptr unordered, align 1
+ ret void
+}
+
+; Try a frame index base.
+define void @f22(i8 %alt, i32 %limit) {
+; CHECK: f22:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: stc {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca i8
+ call void @foo(i8 *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i8 *%ptr
+ %res = select i1 %cond, i8 %orig, i8 %alt
+ store i8 %res, i8 *%ptr
+ call void @foo(i8 *%ptr)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-02.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-02.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-02.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,396 @@
+; Test 16-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(i16 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %alt, i16 %orig
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 32 bits, with the
+; loaded value first.
+define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = sext i16 %orig to i32
+ %res = select i1 %cond, i32 %ext, i32 %alt
+ %trunc = trunc i32 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = sext i16 %orig to i32
+ %res = select i1 %cond, i32 %alt, i32 %ext
+ %trunc = trunc i32 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = zext i16 %orig to i32
+ %res = select i1 %cond, i32 %ext, i32 %alt
+ %trunc = trunc i32 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = zext i16 %orig to i32
+ %res = select i1 %cond, i32 %alt, i32 %ext
+ %trunc = trunc i32 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = sext i16 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = sext i16 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 64 bits, with the
+; loaded value first.
+define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = zext i16 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f10:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %ext = zext i16 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i16
+ store i16 %trunc, i16 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STH range.
+define void @f11(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sth %r3, 4094(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 2047
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check the next halfword up, which should use STHY instead of STH.
+define void @f12(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 2048
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STHY range.
+define void @f13(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, 524286(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 262143
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f14(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f14:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 262144
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check the low end of the STHY range.
+define void @f15(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f15:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 -262144
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f16(i16 *%base, i16 %alt, i32 %limit) {
+; CHECK: f16:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524290
+; CHECK: sth %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i16 *%base, i64 -262145
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check that STHY allows an index.
+define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
+; CHECK: f17:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sthy %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 4096
+ %ptr = inttoptr i64 %add2 to i16 *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK: f18:
+; CHECK: lh {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: sth {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %r3.
+define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
+; CHECK: f19:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lh %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store volatile i16 %res, i16 *%ptr
+ ret void
+}
+
+; Check that atomic loads are not matched. The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK: f20:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: sth {{%r[0-9]+}},
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load atomic i16 *%ptr unordered, align 2
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ ret void
+}
+
+; ...likewise stores.
+define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK: f21:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lh %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-9]+}},
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store atomic i16 %res, i16 *%ptr unordered, align 2
+ ret void
+}
+
+; Try a frame index base.
+define void @f22(i16 %alt, i32 %limit) {
+; CHECK: f22:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: sth {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca i16
+ call void @foo(i16 *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i16 *%ptr
+ %res = select i1 %cond, i16 %orig, i16 %alt
+ store i16 %res, i16 *%ptr
+ call void @foo(i16 *%ptr)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-03.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-03.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-03.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,322 @@
+; Test 32-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %alt, i32 %orig
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly sign-extended to 64 bits, with the
+; loaded value first.
+define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %ext = sext i32 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i32
+ store i32 %trunc, i32 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %ext = sext i32 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i32
+ store i32 %trunc, i32 *%ptr
+ ret void
+}
+
+; Test cases where the value is explicitly zero-extended to 32 bits, with the
+; loaded value first.
+define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %ext = zext i32 %orig to i64
+ %res = select i1 %cond, i64 %ext, i64 %alt
+ %trunc = trunc i64 %res to i32
+ store i32 %trunc, i32 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %ext = zext i32 %orig to i64
+ %res = select i1 %cond, i64 %alt, i64 %ext
+ %trunc = trunc i64 %res to i32
+ store i32 %trunc, i32 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned ST range.
+define void @f7(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: st %r3, 4092(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 1023
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word up, which should use STY instead of ST.
+define void @f8(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 1024
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STY range.
+define void @f9(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, 524284(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f10(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f10:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the low end of the STY range.
+define void @f11(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f11:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f12(i32 *%base, i32 %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524292
+; CHECK: st %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i32 *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check that STY allows an index.
+define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
+; CHECK: f13:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: sty %r4, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 4096
+ %ptr = inttoptr i64 %add2 to i32 *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f14:
+; CHECK: l {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: st {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %r3.
+define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
+; CHECK: f15:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: l %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: st %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store volatile i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check that atomic loads are not matched. The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CS.
+; CHECK: f16:
+; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: st {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load atomic i32 *%ptr unordered, align 4
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; ...likewise stores.
+define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CS.
+; CHECK: f17:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: l %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store atomic i32 %res, i32 *%ptr unordered, align 4
+ ret void
+}
+
+; Try a frame index base.
+define void @f18(i32 %alt, i32 %limit) {
+; CHECK: f18:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: st {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca i32
+ call void @foo(i32 *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ call void @foo(i32 *%ptr)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-04.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-04.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-04.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-04.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,214 @@
+; Test 64-bit conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(i64 *)
+
+; Test with the loaded value first.
+define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %alt, i64 %orig
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STG range.
+define void @f3(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, 524280(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%base, i64 65535
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%base, i64 65536
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check the low end of the STG range.
+define void @f5(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r3, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%base, i64 -65536
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 *%base, i64 %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524296
+; CHECK: stg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr i64 *%base, i64 -65537
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check that STG allows an index.
+define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stg %r4, 524287(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 524287
+ %ptr = inttoptr i64 %add2 to i64 *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK: lg {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stg {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %r3.
+define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: stg %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store volatile i64 %res, i64 *%ptr
+ ret void
+}
+
+; Check that atomic loads are not matched. The transformation is OK for
+; the "unordered" case tested here, but since we don't try to handle atomic
+; operations at all in this context, it seems better to assert that than
+; to restrict the test to a stronger ordering.
+define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
+; FIXME: should use a normal load instead of CSG.
+; CHECK: f10:
+; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: stg {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load atomic i64 *%ptr unordered, align 8
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ ret void
+}
+
+; ...likewise stores.
+define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
+; FIXME: should use a normal store instead of CSG.
+; CHECK: f11:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: lg %r3, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store atomic i64 %res, i64 *%ptr unordered, align 8
+ ret void
+}
+
+; Try a frame index base.
+define void @f12(i64 %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: stg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca i64
+ call void @foo(i64 *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i64 *%ptr
+ %res = select i1 %cond, i64 %orig, i64 %alt
+ store i64 %res, i64 *%ptr
+ call void @foo(i64 *%ptr)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-05.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-05.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-05.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-05.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,213 @@
+; Test f32 conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(float *)
+
+; Test with the loaded value first.
+define void @f1(float *%ptr, float %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(float *%ptr, float %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %alt, float %orig
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STE range.
+define void @f3(float *%base, float %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: ste %f0, 4092(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 1023
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the next word up, which should use STEY instead of STE.
+define void @f4(float *%base, float %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 1024
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STEY range.
+define void @f5(float *%base, float %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 524284(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(float *%base, float %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the low end of the STEY range.
+define void @f7(float *%base, float %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(float *%base, float %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524292
+; CHECK: ste %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr float *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check that STEY allows an index.
+define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stey %f0, 4096(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 4096
+ %ptr = inttoptr i64 %add2 to float *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f10(float *%ptr, float %alt, i32 %limit) {
+; CHECK: f10:
+; CHECK: le {{%f[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: ste {{%f[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %f0.
+define void @f11(float *%ptr, float %alt, i32 %limit) {
+; CHECK: f11:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: le %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: ste %f0, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store volatile float %res, float *%ptr
+ ret void
+}
+
+; Try a frame index base.
+define void @f12(float %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: ste {{%f[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca float
+ call void @foo(float *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load float *%ptr
+ %res = select i1 %cond, float %orig, float %alt
+ store float %res, float *%ptr
+ call void @foo(float *%ptr)
+ ret void
+}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-store-06.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-store-06.ll?rev=185065&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-store-06.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-store-06.ll Thu Jun 27 04:27:40 2013
@@ -0,0 +1,213 @@
+; Test f64 conditional stores that are presented as selects.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @foo(double *)
+
+; Test with the loaded value first.
+define void @f1(double *%ptr, double %alt, i32 %limit) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(double *%ptr, double %alt, i32 %limit) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %alt, double %orig
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STD range.
+define void @f3(double *%base, double %alt, i32 %limit) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: std %f0, 4088(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 511
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the next doubleword up, which should use STDY instead of STD.
+define void @f4(double *%base, double %alt, i32 %limit) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 4096(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 512
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STDY range.
+define void @f5(double *%base, double %alt, i32 %limit) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 524280(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 65535
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(double *%base, double %alt, i32 %limit) {
+; CHECK: f6:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, 524288
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 65536
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the low end of the STDY range.
+define void @f7(double *%base, double %alt, i32 %limit) {
+; CHECK: f7:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, -524288(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 -65536
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(double *%base, double %alt, i32 %limit) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: agfi %r2, -524296
+; CHECK: std %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %ptr = getelementptr double *%base, i64 -65537
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check that STDY allows an index.
+define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) {
+; CHECK: f9:
+; CHECK-NOT: %r2
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r2
+; CHECK: stdy %f0, 524287(%r3,%r2)
+; CHECK: [[LABEL]]:
+; CHECK: br %r14
+ %add1 = add i64 %base, %index
+ %add2 = add i64 %add1, 524287
+ %ptr = inttoptr i64 %add2 to double *
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; Check that volatile loads are not matched.
+define void @f10(double *%ptr, double %alt, i32 %limit) {
+; CHECK: f10:
+; CHECK: ld {{%f[0-5]}}, 0(%r2)
+; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
+; CHECK: [[LABEL]]:
+; CHECK: std {{%f[0-5]}}, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load volatile double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ ret void
+}
+
+; ...likewise stores. In this case we should have a conditional load into %f0.
+define void @f11(double *%ptr, double %alt, i32 %limit) {
+; CHECK: f11:
+; CHECK: jnl [[LABEL:[^ ]*]]
+; CHECK: ld %f0, 0(%r2)
+; CHECK: [[LABEL]]:
+; CHECK: std %f0, 0(%r2)
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store volatile double %res, double *%ptr
+ ret void
+}
+
+; Try a frame index base.
+define void @f12(double %alt, i32 %limit) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo at PLT
+; CHECK-NOT: %r15
+; CHECK: jl [[LABEL:[^ ]*]]
+; CHECK-NOT: %r15
+; CHECK: std {{%f[0-9]+}}, {{[0-9]+}}(%r15)
+; CHECK: [[LABEL]]:
+; CHECK: brasl %r14, foo at PLT
+; CHECK: br %r14
+ %ptr = alloca double
+ call void @foo(double *%ptr)
+ %cond = icmp ult i32 %limit, 42
+ %orig = load double *%ptr
+ %res = select i1 %cond, double %orig, double %alt
+ store double %res, double *%ptr
+ call void @foo(double *%ptr)
+ ret void
+}
More information about the llvm-commits
mailing list