<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/116400>116400</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[ADT] Graph iterators (DepthFirstIterator, PostOrderIterator) with external Visited sets shouldn't set std::forward_iterator_tag
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Maetveis
</td>
</tr>
</table>
<pre>
When using the graph iterators with external sets and doing multiple passes over these iterators, then different passes will not visit the same elements. This is obvious and by design, but it contradicts the requirement of `std::forward_iterator` which is what's claimed by the iterators:
https://github.com/llvm/llvm-project/blob/a9d94834cd91fe93d8723ac4232fe7becdca61a7/llvm/include/llvm/ADT/PostOrderIterator.h#L98
https://github.com/llvm/llvm-project/blob/e6cc58922f5f36e1eecdaf2f44a5ef7501acc139/llvm/include/llvm/ADT/DepthFirstIterator.h#L87
<details>
<summary>Minimal reproducible example of violating the multi-pass guarantee</summary>
```c++
#include <llvm/ADT/PostOrderIterator.h>
#include <llvm/ADT/DepthFirstIterator.h>
#include <llvm/ADT/DirectedGraph.h>
#include <llvm/ADT/GraphTraits.h>
#include <llvm/ADT/SmallPtrSet.h>
#include <vector>
#include <iostream>
namespace llvm {
class DGTestNode;
class DGTestEdge;
using DGTestNodeBase = DGNode<DGTestNode, DGTestEdge>;
using DGTestEdgeBase = DGEdge<DGTestNode, DGTestEdge>;
using DGTestBase = DirectedGraph<DGTestNode, DGTestEdge>;
class DGTestNode : public DGTestNodeBase {
public:
DGTestNode() = default;
};
class DGTestEdge : public DGTestEdgeBase {
public:
DGTestEdge() = delete;
DGTestEdge(DGTestNode &N) : DGTestEdgeBase(N) {}
};
class DGTestGraph : public DGTestBase {
public:
DGTestGraph() = default;
~DGTestGraph(){};
};
using EdgeListTy = SmallVector<DGTestEdge *, 2>;
//===--------------------------------------------------------------------===//
// GraphTraits specializations for the DGTest
//===--------------------------------------------------------------------===//
template <> struct GraphTraits<DGTestNode *> {
using NodeRef = DGTestNode *;
static DGTestNode *DGTestGetTargetNode(DGEdge<DGTestNode, DGTestEdge> *P) {
return &P->getTargetNode();
}
// Provide a mapped iterator so that the GraphTrait-based implementations can
// find the target nodes without having to explicitly go through the edges.
using ChildIteratorType =
mapped_iterator<DGTestNode::iterator, decltype(&DGTestGetTargetNode)>;
using ChildEdgeIteratorType = DGTestNode::iterator;
static NodeRef getEntryNode(NodeRef N) { return N; }
static ChildIteratorType child_begin(NodeRef N) {
return ChildIteratorType(N->begin(), &DGTestGetTargetNode);
}
static ChildIteratorType child_end(NodeRef N) {
return ChildIteratorType(N->end(), &DGTestGetTargetNode);
}
static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
return N->begin();
}
static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
};
template <>
struct GraphTraits<DGTestGraph *> : public GraphTraits<DGTestNode *> {
using nodes_iterator = DGTestGraph::iterator;
static NodeRef getEntryNode(DGTestGraph *DG) { return *DG->begin(); }
static nodes_iterator nodes_begin(DGTestGraph *DG) { return DG->begin(); }
static nodes_iterator nodes_end(DGTestGraph *DG) { return DG->end(); }
};
}
using namespace llvm;
template <typename SupposedlyForwardIt>
void testMultiPass(SupposedlyForwardIt B, SupposedlyForwardIt E,
const char *Label) {
static_assert(std::is_same_v<typename SupposedlyForwardIt::iterator_category,
std::forward_iterator_tag>);
std::vector<typename SupposedlyForwardIt::value_type> Values1, Values2;
for (auto It = B; It != E; ++It)
Values1.push_back(*It);
for (auto It = B; It != E; ++It)
Values2.push_back(*It);
std::cout << Label << ":" << (Values1 == Values2 ? " OK" : " FAIL") << "\n";
}
int main() {
DGTestGraph DG;
DGTestNode N1, N2, N3, N4;
DGTestEdge E2(N2), E3(N3), E4(N4);
DG.addNode(N1);
DG.addNode(N2);
DG.addNode(N3);
DG.addNode(N4);
DG.connect(N1, N2, E2);
DG.connect(N2, N3, E3);
DG.connect(N2, N4, E4);
// The graph looks like this now:
// N1 -> N2 -> N3
// |
// L----> N4
testMultiPass(df_begin(&DG), df_end(&DG), "df_iterator");
{
df_iterator_default_set<DGTestNode*, 3> Visited;
testMultiPass(df_ext_begin(&DG, Visited), df_ext_end(&DG, Visited),
"df_ext_iterator");
}
testMultiPass(po_begin(&DG), po_end(&DG), "po_iterator");
{
std::set<DGTestNode*> Visited;
testMultiPass(po_ext_begin(&DG, Visited), po_ext_end(&DG, Visited),
"po_ext_iterator");
}
}
```
Compile & Run it with `clang++ -Wl,-rpath,build/lib main.cpp -I llvm/include -I build/include -L build/lib -lLLVMSupport && ./a.out`
</details>
Claiming to be a forward iterator has practical problems: for example vector.insert in libstdc++ dispatches based in the iterator category and for forward iterators first does a `std::distance(begin, end)` to know how many elements to allocate. This already fills in the visited set when it is external therefore not all elements will get pushed into the vector during the second pass.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy8WVtv66ry_zT0ZdQowbk-9KHNpar-Xd3VXtXajxGGccx_EeMDOF05D-ezH4Ht2E7cNkt760RRYsMwM_zmwgDMWrnLEO_I5IFMVjescKk2d98YugNKexNrcbz7K8UMCiuzHbgUYWdYnoJ0aJjTxsK7dCngL4cmYwosOgssEyC0H7AvlJO5QsiZtWhBH9B4LhYbDoQufVMGQiYJGsxcTf0ulYJMOzhIK12QbtkeARXuMXN2AG-ptCAt6PggdVFKjo8g0M_LM44LB9IB15kzTEjubGBj8F-FNIEL6ATIdGidINE9ie4Tbd6ZEdtaPzIdwnsqeerlvKfMETqzwBWTewzCPL9mMtE9Ga7IsPpNnctDG90QutlJlxbxgOs9oRulDvXfbW70_yN3hG5ipWNCN2whFuN5NOZiMUpwEYn5jEaMj2lEE5zFyAVn0xGbNXxkxlUhsGm4X70RunnV1v1hBJqnSsNBSmj0vJj_E1rilPPJfEFpMkmiKY4QuWAJTcZjNsFkNhmOGOejaPGllivMXbqRxrqumvNZW00SLQU6JpUl0fqswxb7PTNHEq2_yUzumQKDudGi4DJWCPiL7b0b6gQOUivmam8ODnrr_Q12BTMsc4gkWhK6aTh2RE2H5ZcT-uC_ZSuNqpkBiZZfGuDE8uNhvYhcM04a5A7Fo4_S64YE0jfDpLPXDfi-Z0q9OvMdXXtAz7ADch9B_Tylts4g259xyNgebc44ghcKZPbQ7uXKm2r1-IbWvWiBJHq47FiLXdNRZq5mxAOzXvwKVo8lg2WLG112WKx7ufi-FpeS9Le5NBzaJruSUT8YQKJ7yItYSX4x4RrGsvuUptpQ0jmhi6CSwIQVyjXiZqtPRHvlLkU3KH0kuj23tmyFrjFfl6g9Vzp9Kcfcn0kkdF72zB684lfMIEB_OYWr1C_N9jF2AP-5IKw0-xje0lH8hJ6ldW_HwDnE3Y8qpJZt7Om9dxV66SFlRifRqvze_gOfE7OKd0sOtFIJ2By5ZEr-mzmpMwuJDit_hdr_WL3w63CfK-ZC8iHRGqwzBXdtpTvRF2CN1o0DQFUF-c4_ManCv0veQR_AOuY6weipKndA98bMDuvYuyaR-NGvtWtXIgAMusJkPiBeb0m03p0x9v7WOGMTEXVDabpXow9SIDDYszxHcSppwGpwKSvLrwar25hZT-WXVV9GVVbmLDtjnMhMhLEuKAWZFlhWjbpwkLJDWIo14K9cSS6dOsLOSzS62KVhIIod2kHXBstUKlEvjW_HPKTSiqScQFPAdSANNd6piy5BIFfumJdITXtts2gHVlsBb5ZzJeBDYR84R-1OO3TrzJljZba6uU5ltZlfSPTQWPHE5RIQ7lu2Me5k1sPuwn0uGPhB3p9qDh4HuoRPQLp0si_Vw0z8LeXK8derduH-bf0uzFnpKHb4FY61cc4B-xKTz2T2g9OR1cy_7RWXC8pZ7isbP0mA1YpYZcBmYfztZBnC_RSLrQCpyp3eCPkqNs50XD2eQRPaLk3RY4Mz9crXetRXYr6Q8ZmI0m7XCbjWxme-XeHfKac_8QqfAz0xfC_yXFsU6rgpt8JP7uQxBy0FOLTum983vTJrCZ33DIAHH499HWtCl0149364zqwDnjLjQXlmMapusJXAbpm1aByh89PeXdqtZXvcHr6cT9vttpw53GlzbFT78DRg69jOo9EK7cajqjH1rucKFQ5MFbgNy0-0hh_-zY48dOUjbeeP6t_XUYTOWeE0PLkQUA_eMfwzHfnXdfCTsD19cl7TE9yVgEFe2HQbM_4z-NV9SXU-n78viF4n6AQc9xVBSFBLCGavXwil4XiCNg3zai5QVnu1RCDRxpPDH_9Xkt-Ht83907NnEmr0E8vJMgucH3rjR2YO9qwO7XZma4ft6rFlo1Y-fAlmfKHhNwq_4wvKUL2vqU_ytFrD1pF_i-q3sX8b96C2ehwwIepaYdRda7qd9LPOqNPZ7euT60m4zrJwEDRvz3J9KadF2AJiHX1NOK5nf6lAVVe-nc4ildY_LSj5E8Gl0kKm31u7tIr8ZQQ-l8ILrf6jM4LwIbNlX_NzubNYe73aupxnQpFsm9VgGtJ5KDGTbZ3Dm0ZCqUiaIpXS83KhfmxRbavN5daiO9sphB1gFLKItNKhaHj1aIm_3Lmmy9PARulfrqv4GU1dU4WZeOJPZrP6QJlc90KW637Icn0NZKeM0odTH0QAfYpdgVJFdR1KFfHXKDUP9XFj2-2Wep9LFU5A4M8iA-nKQ3gyHXLFsl2ZkeH2L0Xo8tbkzKWELuNCKkHoRsk4pLUBz3O4fYLuyaxvqSlPTc_QHnyrnp9_fAtLmvFLwdTrMSB0wwa6cGe6lmep_ce2S8Xkvtr9xX7nWS22zdYzZRZyw7iTnCnIjY4V7q1P6n5xqg91y_V2IDNfDoDMQMnYOlGd0IKQNmeOp2ih2rNmnTN7qJf_cHvgGZ_rYSGRxjoQGi2wzm2BkNaxjPt0WXnKEoIzLMh06Cf2M9PvkOp32LPseLq58D1MKe1FV7cYTBlk4giJVMrWOh5KRwKLDt5TDLaWtrlucSkaTLTBcE3ClGokhMsTv-H2a3CYtdMlzwAXiMLUh-AWuc5EuHUZ3Ii7SCyiBbvBu9EsGk3n42gxuknvZpHA8YSKCeV8SHmyGC6SxXw2j3C2mFM6vZF3dEjHo9FoMprQKR0P6CKmPB4KNsPhdEITMh7inkk18C430GZ3I60t8G40mo6HwxvlV3wbrqIozfAdQm9Yp1c35i5cQcTFzpLxUEnrbMPGSafCHdb96o1MVuUmpWU9X2lfnKd7S10czvuFvnuh9aOxgAWb6kKJjNCZCyb5tEy8KYy6--2rlTBpS-imQuVwR_8bAAD__xKyQu8">