[llvm] 3d259a8 - [PowerPC] Fix LQ-STQ instructions to use correct offset and base
Ahsan Saghir via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 16 08:47:44 PDT 2022
Author: Ahsan Saghir
Date: 2022-06-16T10:47:38-05:00
New Revision: 3d259a82da3ed5cf721452cc97404e3c26527799
URL: https://github.com/llvm/llvm-project/commit/3d259a82da3ed5cf721452cc97404e3c26527799
DIFF: https://github.com/llvm/llvm-project/commit/3d259a82da3ed5cf721452cc97404e3c26527799.diff
LOG: [PowerPC] Fix LQ-STQ instructions to use correct offset and base
This patch fixes the load and store quadword instructions on
PowerPC to use correct offset and base address.
Reviewed By: #powerpc, nemanjai, lkail
Differential Revision: https://reviews.llvm.org/D126807
Added:
llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll
llvm/test/CodeGen/PowerPC/LQ-STQ.ll
Modified:
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 39bb0054823b5..b98d2937e9834 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -114,6 +114,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+ ImmToIdxMap[PPC::LQ] = PPC::LQX_PSEUDO;
+ ImmToIdxMap[PPC::STQ] = PPC::STQX_PSEUDO;
// VSX
ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
@@ -489,6 +491,14 @@ bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) co
LLVM_DEBUG(dbgs() << "TRUE - Memory operand is X-Form.\n");
return true;
}
+
+ // This is a spill/restore of a quadword.
+ if ((Opcode == PPC::RESTORE_QUADWORD) || (Opcode == PPC::SPILL_QUADWORD)) {
+ LLVM_DEBUG(dbgs() << "Memory Operand: " << InstrInfo->getName(Opcode)
+ << " for register " << printReg(Reg, this) << ".\n");
+ LLVM_DEBUG(dbgs() << "TRUE - Memory operand is a quadword.\n");
+ return true;
+ }
}
LLVM_DEBUG(dbgs() << "FALSE - Scavenging is not required.\n");
return false;
@@ -1533,6 +1543,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
Register SRegHi = MF.getRegInfo().createVirtualRegister(RC),
SReg = MF.getRegInfo().createVirtualRegister(RC);
+ unsigned NewOpcode = 0u;
// Insert a set of rA with the full offset value before the ld, st, or add
if (isInt<16>(Offset))
@@ -1561,7 +1572,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
OpC != TargetOpcode::INLINEASM_BR) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
- unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ NewOpcode = ImmToIdxMap.find(OpC)->second;
MI.setDesc(TII.get(NewOpcode));
OperandBase = 1;
} else {
@@ -1571,6 +1582,20 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register StackReg = MI.getOperand(FIOperandNum).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
+
+ // Since these are not real X-Form instructions, we must
+ // add the registers and access 0(NewReg) rather than
+ // emitting the X-Form pseudo.
+ if (NewOpcode == PPC::LQX_PSEUDO || NewOpcode == PPC::STQX_PSEUDO) {
+ assert(is64Bit && "Quadword loads/stores only supported in 64-bit mode");
+ Register NewReg = MF.getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(MBB, II, dl, TII.get(PPC::ADD8), NewReg)
+ .addReg(SReg, RegState::Kill)
+ .addReg(StackReg);
+ MI.setDesc(TII.get(NewOpcode == PPC::LQX_PSEUDO ? PPC::LQ : PPC::STQ));
+ MI.getOperand(OperandBase + 1).ChangeToRegister(NewReg, false);
+ MI.getOperand(OperandBase).ChangeToImmediate(0);
+ }
}
Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll
new file mode 100644
index 0000000000000..ca62759c3428d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr10 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mattr=+quadword-atomics -ppc-asm-full-reg-names -o - %s | FileCheck %s
+
+%struct.StructA = type { [16 x i8] }
+
+ at s1 = dso_local global i128 324929342, align 16
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @STQ() #0 {
+; CHECK-LABEL: STQ:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lis r0, -16
+; CHECK-NEXT: ori r0, r0, 51488
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_offset 997088
+; CHECK-NEXT: pld r3, s1 at PCREL+8(0), 1
+; CHECK-NEXT: std r3, 40(r1)
+; CHECK-NEXT: pld r3, s1 at PCREL(0), 1
+; CHECK-NEXT: std r3, 32(r1)
+; CHECK-NEXT: ld r3, 40(r1)
+; CHECK-NEXT: ld r4, 32(r1)
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr r5, r4
+; CHECK-NEXT: mr r4, r3
+; CHECK-NEXT: lis r3, 15
+; CHECK-NEXT: ori r3, r3, 14032
+; CHECK-NEXT: add r3, r3, r1
+; CHECK-NEXT: stq r4, 0(r3)
+; CHECK-NEXT: ld r1, 0(r1)
+; CHECK-NEXT: blr
+entry:
+ %s2 = alloca %struct.StructA, align 16
+ %s3 = alloca %struct.StructA, align 16
+ %arr = alloca [997003 x i8], align 1
+ %tmp = alloca %struct.StructA, align 16
+ call void @llvm.memcpy.p0.p0.i64(ptr align 16 %tmp, ptr align 16 @s1, i64 16, i1 false)
+ %0 = load i128, ptr %tmp, align 16
+ store atomic i128 %0, ptr %s2 seq_cst, align 16
+ ret void
+}
+
+define dso_local void @LQ() #0 {
+; CHECK-LABEL: LQ:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lis r0, -16
+; CHECK-NEXT: ori r0, r0, 51488
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_offset 997088
+; CHECK-NEXT: pld r3, s1 at PCREL+8(0), 1
+; CHECK-NEXT: std r3, 40(r1)
+; CHECK-NEXT: pld r3, s1 at PCREL(0), 1
+; CHECK-NEXT: std r3, 32(r1)
+; CHECK-NEXT: sync
+; CHECK-NEXT: lis r3, 15
+; CHECK-NEXT: ori r3, r3, 14016
+; CHECK-NEXT: add r3, r3, r1
+; CHECK-NEXT: lq r4, 0(r3)
+; CHECK-NEXT: cmpd cr7, r5, r5
+; CHECK-NEXT: bne- cr7, .+4
+; CHECK-NEXT: isync
+; CHECK-NEXT: ld r1, 0(r1)
+; CHECK-NEXT: blr
+entry:
+ %s2 = alloca %struct.StructA, align 16
+ %s3 = alloca %struct.StructA, align 16
+ %arr = alloca [997003 x i8], align 1
+ %tmp = alloca %struct.StructA, align 16
+ call void @llvm.memcpy.p0.p0.i64(ptr align 16 %tmp, ptr align 16 @s1, i64 16, i1 false)
+ %0 = load i128, ptr %tmp, align 16
+ %1 = load atomic i128, ptr %s3 seq_cst, align 16
+ ret void
+}
+
+; Function Attrs: argmemonly nofree nounwind willreturn
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
+
+attributes #0 = { noinline optnone }
diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll
new file mode 100644
index 0000000000000..94a432a2a9fd6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr10 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mattr=+quadword-atomics -ppc-asm-full-reg-names -o - %s | FileCheck %s
+
+%struct.StructA = type { [16 x i8] }
+
+ at s1 = dso_local global %struct.StructA { [16 x i8] c"\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A" }, align 16
+
+define dso_local void @test() {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: plxv vs0, s1 at PCREL(0), 1
+; CHECK-NEXT: stxv vs0, -48(r1)
+; CHECK-NEXT: ld r3, -40(r1)
+; CHECK-NEXT: ld r4, -48(r1)
+; CHECK-NEXT: sync
+; CHECK-NEXT: mr r5, r4
+; CHECK-NEXT: mr r4, r3
+; CHECK-NEXT: stq r4, -16(r1)
+; CHECK-NEXT: sync
+; CHECK-NEXT: lq r4, -16(r1)
+; CHECK-NEXT: cmpd cr7, r5, r5
+; CHECK-NEXT: bne- cr7, .+4
+; CHECK-NEXT: isync
+; CHECK-NEXT: std r4, -24(r1)
+; CHECK-NEXT: std r5, -32(r1)
+; CHECK-NEXT: blr
+entry:
+ %s2 = alloca %struct.StructA, align 16
+ %s3 = alloca %struct.StructA, align 16
+ %agg.tmp.ensured = alloca %struct.StructA, align 16
+ call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.tmp.ensured, ptr align 16 @s1, i64 16, i1 false)
+ %0 = load i128, ptr %agg.tmp.ensured, align 16
+ store atomic i128 %0, ptr %s2 seq_cst, align 16
+ %atomic-load = load atomic i128, ptr %s2 seq_cst, align 16
+ store i128 %atomic-load, ptr %s3, align 16
+ ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
More information about the llvm-commits
mailing list