[llvm] r276787 - [Hexagon] Bitwise operations for insert/extract word not simplified
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 26 11:30:12 PDT 2016
Author: kparzysz
Date: Tue Jul 26 13:30:11 2016
New Revision: 276787
URL: http://llvm.org/viewvc/llvm-project?rev=276787&view=rev
Log:
[Hexagon] Bitwise operations for insert/extract word not simplified
Change the bit simplifier to generate REG_SEQUENCE instructions in
addition to COPY, which will handle cases of word insert/extract.
Added:
llvm/trunk/test/CodeGen/Hexagon/bit-gen-rseq.ll
Modified:
llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp
llvm/trunk/lib/Target/Hexagon/HexagonBitTracker.cpp
llvm/trunk/test/CodeGen/Hexagon/struct_args.ll
Modified: llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp?rev=276787&r1=276786&r2=276787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonBitSimplify.cpp Tue Jul 26 13:30:11 2016
@@ -1249,6 +1249,8 @@ bool RedundantInstrElimination::usedBits
bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
const RegisterSet&) {
+ if (!BT.reached(&B))
+ return false;
bool Changed = false;
for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
@@ -1295,7 +1297,15 @@ bool RedundantInstrElimination::processB
BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(RS.Reg, 0, RS.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
- BT.put(BitTracker::RegisterRef(NewR), SC);
+ // Do not update the bit tracker. This pass can create copies between
+ // registers that don't have the exact same values. Updating the
+ // tracker here may be tricky. E.g.
+ // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal
+ //
+ // vreg3 = copy vreg2 ; <- inserted
+ // ... = vreg3 ; <- replaced from vreg2
+ // Indirectly, we can create a "copy" between vreg1 and vreg2 even
+ // though their exact values do not match.
Changed = true;
break;
}
@@ -1317,8 +1327,8 @@ namespace {
MachineRegisterInfo &mri)
: Transformation(true), HII(hii), MRI(mri), BT(bt) {}
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ static bool isTfrConst(const MachineInstr &MI);
private:
- bool isTfrConst(const MachineInstr &MI) const;
bool isConst(unsigned R, int64_t &V) const;
unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
@@ -1346,7 +1356,7 @@ bool ConstGeneration::isConst(unsigned R
return true;
}
-bool ConstGeneration::isTfrConst(const MachineInstr &MI) const {
+bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::A2_combineii:
@@ -1413,6 +1423,8 @@ unsigned ConstGeneration::genTfrConst(co
bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ if (!BT.reached(&B))
+ return false;
bool Changed = false;
RegisterSet Defs;
@@ -1426,14 +1438,16 @@ bool ConstGeneration::processBlock(Machi
unsigned DR = Defs.find_first();
if (!TargetRegisterInfo::isVirtualRegister(DR))
continue;
- int64_t C;
- if (isConst(DR, C)) {
+ uint64_t U;
+ const BitTracker::RegisterCell &DRC = BT.lookup(DR);
+ if (HBS::getConst(DRC, 0, DRC.width(), U)) {
+ int64_t C = U;
DebugLoc DL = I->getDebugLoc();
auto At = I->isPHI() ? B.getFirstNonPHI() : I;
unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
if (ImmReg) {
HBS::replaceReg(DR, ImmReg, MRI);
- BT.put(ImmReg, BT.lookup(DR));
+ BT.put(ImmReg, DRC);
Changed = true;
}
}
@@ -1467,6 +1481,7 @@ namespace {
const HexagonInstrInfo &HII;
MachineRegisterInfo &MRI;
BitTracker &BT;
+ RegisterSet Forbidden;
};
class CopyPropagation : public Transformation {
@@ -1491,17 +1506,20 @@ bool CopyGeneration::findMatch(const Bit
if (!BT.has(Inp.Reg))
return false;
const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+ auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
unsigned B, W;
if (!HBS::getSubregMask(Inp, B, W, MRI))
return false;
for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
- if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+ if (!BT.has(R) || Forbidden[R])
continue;
const BitTracker::RegisterCell &RC = BT.lookup(R);
unsigned RW = RC.width();
if (W == RW) {
- if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+ if (FRC != MRI.getRegClass(R))
+ continue;
+ if (!HBS::isTransparentCopy(R, Inp, MRI))
continue;
if (!HBS::isEqual(InpRC, B, RC, 0, W))
continue;
@@ -1524,7 +1542,8 @@ bool CopyGeneration::findMatch(const Bit
else
continue;
Out.Reg = R;
- return true;
+ if (HBS::isTransparentCopy(Out, Inp, MRI))
+ return true;
}
return false;
}
@@ -1532,6 +1551,8 @@ bool CopyGeneration::findMatch(const Bit
bool CopyGeneration::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
+ if (!BT.reached(&B))
+ return false;
RegisterSet AVB(AVs);
bool Changed = false;
RegisterSet Defs;
@@ -1543,20 +1564,44 @@ bool CopyGeneration::processBlock(Machin
HBS::getInstrDefs(*I, Defs);
unsigned Opc = I->getOpcode();
- if (CopyPropagation::isCopyReg(Opc))
+ if (CopyPropagation::isCopyReg(Opc) || ConstGeneration::isTfrConst(*I))
continue;
+ DebugLoc DL = I->getDebugLoc();
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+
for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
BitTracker::RegisterRef MR;
- if (!findMatch(R, MR, AVB))
+ auto *FRC = HBS::getFinalVRegClass(R, MRI);
+
+ if (findMatch(R, MR, AVB)) {
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(MR.Reg, 0, MR.Sub);
+ BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+ HBS::replaceReg(R, NewR, MRI);
+ Forbidden.insert(R);
continue;
- DebugLoc DL = I->getDebugLoc();
- auto *FRC = HBS::getFinalVRegClass(MR, MRI);
- unsigned NewR = MRI.createVirtualRegister(FRC);
- auto At = I->isPHI() ? B.getFirstNonPHI() : I;
- BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
- .addReg(MR.Reg, 0, MR.Sub);
- BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+ }
+
+ if (FRC == &Hexagon::DoubleRegsRegClass) {
+ // Try to generate REG_SEQUENCE.
+ BitTracker::RegisterRef TL = { R, Hexagon::subreg_loreg };
+ BitTracker::RegisterRef TH = { R, Hexagon::subreg_hireg };
+ BitTracker::RegisterRef ML, MH;
+ if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
+ auto *FRC = HBS::getFinalVRegClass(R, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
+ .addReg(ML.Reg, 0, ML.Sub)
+ .addImm(Hexagon::subreg_loreg)
+ .addReg(MH.Reg, 0, MH.Sub)
+ .addImm(Hexagon::subreg_hireg);
+ BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
+ HBS::replaceReg(R, NewR, MRI);
+ Forbidden.insert(R);
+ }
+ }
}
}
@@ -2121,6 +2166,8 @@ bool BitSimplification::simplifyTstbit(M
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
+ if (!BT.reached(&B))
+ return false;
bool Changed = false;
RegisterSet AVB = AVs;
RegisterSet Defs;
@@ -2203,7 +2250,11 @@ bool HexagonBitSimplify::runOnMachineFun
RegisterSet ARE; // Available registers for RIE.
RedundantInstrElimination RIE(BT, HII, MRI);
- Changed |= visitBlock(Entry, RIE, ARE);
+ bool Ried = visitBlock(Entry, RIE, ARE);
+ if (Ried) {
+ Changed = true;
+ BT.run();
+ }
RegisterSet ACG; // Available registers for CG.
CopyGeneration CopyG(BT, HII, MRI);
Modified: llvm/trunk/lib/Target/Hexagon/HexagonBitTracker.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonBitTracker.cpp?rev=276787&r1=276786&r2=276787&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonBitTracker.cpp (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonBitTracker.cpp Tue Jul 26 13:30:11 2016
@@ -138,8 +138,21 @@ bool HexagonEvaluator::evaluate(const Ma
if (NumDefs == 0)
return false;
- if (MI.mayLoad())
- return evaluateLoad(MI, Inputs, Outputs);
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+
+ if (MI.mayLoad()) {
+ switch (Opc) {
+ // These instructions may be marked as mayLoad, but they are generating
+ // immediate values, so skip them.
+ case CONST32:
+ case CONST32_Int_Real:
+ case CONST64_Int_Real:
+ break;
+ default:
+ return evaluateLoad(MI, Inputs, Outputs);
+ }
+ }
// Check COPY instructions that copy formal parameters into virtual
// registers. Such parameters can be sign- or zero-extended at the
@@ -174,8 +187,6 @@ bool HexagonEvaluator::evaluate(const Ma
}
RegisterRefs Reg(MI);
- unsigned Opc = MI.getOpcode();
- using namespace Hexagon;
#define op(i) MI.getOperand(i)
#define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs))
#define im(i) MI.getOperand(i).getImm()
@@ -246,9 +257,7 @@ bool HexagonEvaluator::evaluate(const Ma
case A2_tfrsi:
case A2_tfrpi:
case CONST32:
- case CONST32_Float_Real:
case CONST32_Int_Real:
- case CONST64_Float_Real:
case CONST64_Int_Real:
return rr0(eIMM(im(1), W0), Outputs);
case TFR_PdFalse:
Added: llvm/trunk/test/CodeGen/Hexagon/bit-gen-rseq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/bit-gen-rseq.ll?rev=276787&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/bit-gen-rseq.ll (added)
+++ llvm/trunk/test/CodeGen/Hexagon/bit-gen-rseq.ll Tue Jul 26 13:30:11 2016
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s
+; Check that we don't generate any bitwise operations.
+
+; CHECK-NOT: = or(
+; CHECK-NOT: = and(
+
+target triple = "hexagon"
+
+define i32 @fred(i32* nocapture readonly %p, i32 %n) #0 {
+entry:
+ %t.sroa.0.048 = load i32, i32* %p, align 4
+ %cmp49 = icmp ugt i32 %n, 1
+ br i1 %cmp49, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %t.sroa.0.052 = phi i32 [ %t.sroa.0.0, %for.body ], [ %t.sroa.0.048, %entry ]
+ %t.sroa.11.051 = phi i64 [ %t.sroa.11.0.extract.shift, %for.body ], [ 0, %entry ]
+ %i.050 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
+ %t.sroa.0.0.insert.ext = zext i32 %t.sroa.0.052 to i64
+ %t.sroa.0.0.insert.insert = or i64 %t.sroa.0.0.insert.ext, %t.sroa.11.051
+ %0 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert, i64 %t.sroa.0.0.insert.insert)
+ %t.sroa.11.0.extract.shift = and i64 %0, -4294967296
+ %arrayidx4 = getelementptr inbounds i32, i32* %p, i32 %i.050
+ %inc = add nuw i32 %i.050, 1
+ %t.sroa.0.0 = load i32, i32* %arrayidx4, align 4
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %t.sroa.0.0.lcssa = phi i32 [ %t.sroa.0.048, %entry ], [ %t.sroa.0.0, %for.body ]
+ %t.sroa.11.0.lcssa = phi i64 [ 0, %entry ], [ %t.sroa.11.0.extract.shift, %for.body ]
+ %t.sroa.0.0.insert.ext17 = zext i32 %t.sroa.0.0.lcssa to i64
+ %t.sroa.0.0.insert.insert19 = or i64 %t.sroa.0.0.insert.ext17, %t.sroa.11.0.lcssa
+ %1 = tail call i64 @llvm.hexagon.A2.addp(i64 %t.sroa.0.0.insert.insert19, i64 %t.sroa.0.0.insert.insert19)
+ %t.sroa.11.0.extract.shift41 = lshr i64 %1, 32
+ %t.sroa.11.0.extract.trunc42 = trunc i64 %t.sroa.11.0.extract.shift41 to i32
+ ret i32 %t.sroa.11.0.extract.trunc42
+}
+
+declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
+
+attributes #0 = { norecurse nounwind readonly }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/Hexagon/struct_args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/struct_args.ll?rev=276787&r1=276786&r2=276787&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Hexagon/struct_args.ll (original)
+++ llvm/trunk/test/CodeGen/Hexagon/struct_args.ll Tue Jul 26 13:30:11 2016
@@ -1,6 +1,6 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
-; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
-; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
+; RUN: llc -march=hexagon -disable-hsdr < %s | FileCheck %s
+; CHECK-DAG: r0 = memw
+; CHECK-DAG: r1 = memw
%struct.small = type { i32, i32 }
@@ -8,7 +8,7 @@
define void @foo() nounwind {
entry:
- %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 1
+ %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 4
call void @bar(i64 %0)
ret void
}
More information about the llvm-commits
mailing list