summaryrefslogtreecommitdiff
path: root/usr.bin
diff options
context:
space:
mode:
authorMarc Espie <espie@cvs.openbsd.org>2001-03-26 22:53:34 +0000
committerMarc Espie <espie@cvs.openbsd.org>2001-03-26 22:53:34 +0000
commit60ea328912452bb904fc34be01f64367de8fe8b3 (patch)
tree3a4743094e1fef5ac26efb7c9c1b9af4d6a1c1a5 /usr.bin
parent0ed499a8c4ed0e7ef2f74d6e681f98b82cce5028 (diff)
Replacement for original tsort.
The old code suffers from a few defects: - it does not even implement the standard optimal topological sort algorithm. It's much slower. - its longest cycle computation is completely bogus. This is clean-slate code, that does implement the actual standard optimal topological sort, together with a correct graph traversal to find longest cycles. It does also feature a `stable tsort' mode, where it uses a heap to yield the least disturbed permutation of input nodes that satisfies the ordering constraints (in particular, try tsort -f). Thanks to the nature of the problem, the actual output won't exactly match the old one, but it does pass the regression suite (and it is a topological sorter). Ok millert@
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/tsort/tsort.148
-rw-r--r--usr.bin/tsort/tsort.c1215
2 files changed, 904 insertions, 359 deletions
diff --git a/usr.bin/tsort/tsort.1 b/usr.bin/tsort/tsort.1
index 48132883ff7..971db804316 100644
--- a/usr.bin/tsort/tsort.1
+++ b/usr.bin/tsort/tsort.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: tsort.1,v 1.6 2000/03/11 21:40:06 aaron Exp $
+.\" $OpenBSD: tsort.1,v 1.7 2001/03/26 22:53:33 espie Exp $
.\" $NetBSD: tsort.1,v 1.6 1996/01/17 20:37:49 mycroft Exp $
.\"
.\" Copyright (c) 1990, 1993, 1994
@@ -37,7 +37,7 @@
.\"
.\" @(#)tsort.1 8.3 (Berkeley) 4/1/94
.\"
-.Dd April 1, 1994
+.Dd November 1, 1999
.Dt TSORT 1
.Os
.Sh NAME
@@ -45,11 +45,15 @@
.Nd topological sort of a directed graph
.Sh SYNOPSIS
.Nm tsort
+.Op Fl f
+.Op Fl h Ar file
.Op Fl l
.Op Fl q
+.Op Fl r
+.Op Fl w
.Op Ar file
.Sh DESCRIPTION
-.Nm
+.Nm tsort
takes a list of pairs of node names representing directed arcs in
a graph and prints the nodes in topological order on standard output.
Input is taken from the named
@@ -57,7 +61,7 @@ Input is taken from the named
or from standard input if no file
is given.
.Pp
-Node names in the input are separated by whitespace and there must
+Node names in the input are separated by white space and there must
be an even number of node pairs.
.Pp
Presence of a node in a graph can be represented by an arc from the node
@@ -70,23 +74,45 @@ Cycles are reported on standard error.
.Pp
The options are as follows:
.Bl -tag -width Ds
+.It Fl f
+Resolve ambiguities by selecting nodes based on the order of apparition
+of the first component of the pairs.
+.It Fl h Ar file
+Use
+.Ar file ,
+which holds an ordered list of nodes, to resolve ambiguities.
.It Fl l
Search for and display the longest cycle.
-Can take a very long time.
.It Fl q
-Do not display informational messages about cycles.
-This is primarily
+Do not display informational messages about cycles. This is primarily
intended for building libraries, where optimal ordering is not critical,
and cycles occur often.
+.It Fl r
+Reverse the ordering relation.
+.It Fl v
+Inform on the exact number of edges broken while breaking cycles.
+.It Fl w
+Exit with exit code the number of cycles
+.Nm
+had to break.
.El
.Sh SEE ALSO
-.Xr ar 1
+.Xr ar 1 ,
+.Xr lorder 1 ,
+.Rs
+.%A Donald E. Knuth
+.%B The Art of Computer Programming
+.%V Vol. 1
+.%P pp 258-268
+.%D 1973
+.Re
.Sh HISTORY
A
.Nm
command appeared in
.At v7 .
This
-.Nm
-command and manual page are derived from sources contributed to Berkeley by
-Michael Rendell of Memorial University of Newfoundland.
+.Nm tsort
+command was completely rewritten by Marc Espie for
+.Ox ,
+to finally use the well-known optimal algorithms for topological sorting.
diff --git a/usr.bin/tsort/tsort.c b/usr.bin/tsort/tsort.c
index 8fc1acf0379..c43ace0cf96 100644
--- a/usr.bin/tsort/tsort.c
+++ b/usr.bin/tsort/tsort.c
@@ -1,12 +1,9 @@
-/* $OpenBSD: tsort.c,v 1.4 1997/01/15 23:43:26 millert Exp $ */
-/* $NetBSD: tsort.c,v 1.11 1996/01/17 20:37:53 mycroft Exp $ */
+/* $OpenBSD: tsort.c,v 1.5 2001/03/26 22:53:33 espie Exp $ */
+/* ex:ts=8 sw=4:
+ */
/*
- * Copyright (c) 1989, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Michael Rendell of Memorial University of Newfoundland.
+ * Copyright (c) 1999-2001 Marc Espie.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -18,423 +15,945 @@
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * This product includes software developed by Marc Espie for the OpenBSD
+ * Project.
*
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD
+ * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef lint
-static char copyright[] =
-"@(#) Copyright (c) 1989, 1993, 1994\n\
- The Regents of the University of California. All rights reserved.\n";
-#endif /* not lint */
-
-#ifndef lint
-#if 0
-static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95";
-#endif
-static char rcsid[] = "$OpenBSD: tsort.c,v 1.4 1997/01/15 23:43:26 millert Exp $";
-#endif /* not lint */
-
#include <sys/types.h>
-
+#include <assert.h>
#include <ctype.h>
-#include <db.h>
#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <ohash.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <unistd.h>
-/*
- * Topological sort. Input is a list of pairs of strings separated by
- * white space (spaces, tabs, and/or newlines); strings are written to
- * standard output in sorted order, one per line.
+/* The complexity of topological sorting is O(e), where e is the
+ * size of input. While reading input, vertices have to be identified,
+ * thus add the complexity of e keys retrieval among v keys using
+ * an appropriate data structure. This program uses open double hashing
+ * for that purpose. See Knuth for the expected complexity of double
+ * hashing (Brent variation should probably be used if v << e, as a user
+ * option).
+ *
+ * The algorithm used for longest cycle reporting is accurate, but somewhat
+ * expensive. It may need to build all free paths of the graph (a free
+ * path is a path that never goes twice through the same node), whose
+ * number can be as high as O(2^e). Usually, the number of free paths is
+ * much smaller though. This program's author does not believe that a
+ * significantly better worst-case complexity algorithm exists.
+ *
+ * In case of a hints file, the set of minimal nodes is maintained as a
+ * heap. The resulting complexity is O(e+v log v) for the worst case.
+ * The average should actually be near O(e).
+ *
+ * The simple topological sort algorithm detects cycles. This program
+ * goes further, breaking cycles through the use of simple heuristics.
+ * Each cycle break checks the whole set of nodes, hence if c cycles break
+ * are needed, this is an extra cost of O(c v).
*
- * usage:
- * tsort [-l] [inputfile]
- * If no input file is specified, standard input is read.
+ * Possible heuristics are as follows:
+ * - break cycle at node with lowest number of predecessors (default case),
+ * - break longest cycle at node with lowest number of predecessors,
+ * - break cycle at next node from the hints file.
*
- * Should be compatable with AT&T tsort HOWEVER the output is not identical
- * (i.e. for most graphs there is more than one sorted order, and this tsort
- * usually generates a different one then the AT&T tsort). Also, cycle
- * reporting seems to be more accurate in this version (the AT&T tsort
- * sometimes says a node is in a cycle when it isn't).
+ * Except for the hints file case, which sets an explicit constraint on
+ * which cycle to break, those heuristics locally result in the smallest
+ * number of broken edges.
*
- * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
+ * Those are admittedly greedy strategies, as is the selection of the next
+ * node from the hints file amongst equivalent candidates that is used for
+ * `stable' topological sorting.
*/
-#define HASHSIZE 53 /* doesn't need to be big */
-#define NF_MARK 0x1 /* marker for cycle detection */
-#define NF_ACYCLIC 0x2 /* this node is cycle free */
-#define NF_NODEST 0x4 /* Unreachable */
-
-typedef struct node_str NODE;
-
-struct node_str {
- NODE **n_prevp; /* pointer to previous node's n_next */
- NODE *n_next; /* next node in graph */
- NODE **n_arcs; /* array of arcs to other nodes */
- int n_narcs; /* number of arcs in n_arcs[] */
- int n_arcsize; /* size of n_arcs[] array */
- int n_refcnt; /* # of arcs pointing to this node */
- int n_flags; /* NF_* */
- char n_name[1]; /* name of this node */
+
+#ifdef __GNUC__
+#define UNUSED __attribute__((unused))
+#else
+#define UNUSED
+#endif
+
+struct node;
+
+/* The set of arcs from a given node is stored as a linked list. */
+struct link {
+ struct link *next;
+ struct node *node;
};
-typedef struct _buf {
- char *b_buf;
- int b_bsize;
-} BUF;
+struct node {
+ unsigned int refs; /* Number of arcs left, coming into this node .
+ * Note that nodes with a null count can't
+ * be part of cycles. */
+ struct link *arcs; /* List of forward arcs. */
-DB *db;
-NODE *graph, **cycle_buf, **longest_cycle;
-int debug, longest, quiet;
+ unsigned int order; /* Order of nodes according to a hint file. */
-void add_arc __P((char *, char *));
-int find_cycle __P((NODE *, NODE *, int, int));
-NODE *get_node __P((char *));
-void *grow_buf __P((void *, int));
-void remove_node __P((NODE *));
-void tsort __P((void));
-void usage __P((void));
+ /* Cycle detection algorithms build a free path of nodes. */
+ struct node *from; /* Previous node in the current path. */
-int
-main(argc, argv)
- int argc;
- char *argv[];
+ unsigned int mark; /* Mark processed nodes in cycle discovery. */
+ struct link *traverse; /* Next link to traverse when backtracking. */
+ char k[1]; /* Name of this node. */
+};
+
+#define HASH_START 9
+
+struct array {
+ unsigned int entries;
+ struct node **t;
+};
+
+static void nodes_init __P((struct ohash *));
+static struct node *node_lookup __P((struct ohash *, const char *, const char *));
+static void usage __P((void));
+static struct node *new_node __P((const char *, const char *));
+
+static void read_pairs __P((FILE *, struct ohash *, int, const char *));
+static void split_nodes __P((struct ohash *, struct array *, struct array *));
+static void insert_arc __P((struct node *, struct node *));
+
+#ifdef DEBUG
+static void dump_node __P((struct node *));
+static void dump_array __P((struct array *));
+static void dump_hash __P((struct ohash *));
+#endif
+static void read_hints __P((FILE *, struct ohash *, const char *));
+static struct node *find_smallest_node __P((struct array *));
+static struct node *find_good_cycle_break __P((struct array *));
+static void print_cycle __P((struct array *));
+static int find_cycle_with __P((struct node *, struct array *));
+static struct node *find_predecessor __P((struct array *, struct node *));
+static unsigned int traverse_node __P((struct node *, unsigned int, struct array *));
+static struct node *find_longest_cycle __P((struct array *, struct array *));
+
+static void heap_down __P((struct array *, unsigned int));
+static void heapify __P((struct array *));
+static struct node *dequeue __P((struct array *));
+static void enqueue __P((struct array *, struct node *));
+
+
+
+#define erealloc(n, s) emem(realloc(n, s))
+static void *hash_alloc __P((size_t, void *));
+static void hash_free __P((void *, size_t, void *));
+static void* entry_alloc __P((size_t, void *));
+static void *emalloc __P((size_t));
+static void *emem __P((void *));
+#define DEBUG_TRAVERSE 0
+static struct ohash_info node_info = {
+ offsetof(struct node, k), NULL, hash_alloc, hash_free, entry_alloc };
+
+
+int main __P((int, char *[]));
+
+
+/***
+ *** Memory handling.
+ ***/
+
+static void *
+emem(p)
+ void *p;
{
- register BUF *b;
- register int c, n;
- FILE *fp;
- int bsize, ch, nused;
- BUF bufs[2];
-
- while ((ch = getopt(argc, argv, "dlq")) != -1)
- switch (ch) {
- case 'd':
- debug = 1;
- break;
- case 'l':
- longest = 1;
- break;
- case 'q':
- quiet = 1;
- break;
- case '?':
- default:
- usage();
- }
- argc -= optind;
- argv += optind;
+ if (p)
+ return p;
+ else
+ errx(EX_SOFTWARE, "Memory exhausted");
+}
- switch (argc) {
- case 0:
- fp = stdin;
- break;
- case 1:
- if ((fp = fopen(*argv, "r")) == NULL)
- err(1, "%s", *argv);
- break;
- default:
- usage();
- }
+static void *
+hash_alloc(s, u)
+ size_t s;
+ void *u UNUSED;
+{
+ return emem(calloc(s, 1));
+}
- for (b = bufs, n = 2; --n >= 0; b++)
- b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
+static void
+hash_free(p, s, u)
+ void *p;
+ size_t s UNUSED;
+ void *u UNUSED;
+{
+ free(p);
+}
- /* parse input and build the graph */
- for (n = 0, c = getc(fp);;) {
- while (c != EOF && isspace(c))
- c = getc(fp);
- if (c == EOF)
- break;
+static void *
+entry_alloc(s, u)
+ size_t s;
+ void *u UNUSED;
+{
+ return emalloc(s);
+}
- nused = 0;
- b = &bufs[n];
- bsize = b->b_bsize;
- do {
- b->b_buf[nused++] = c;
- if (nused == bsize)
- b->b_buf = grow_buf(b->b_buf, bsize *= 2);
- c = getc(fp);
- } while (c != EOF && !isspace(c));
-
- b->b_buf[nused] = '\0';
- b->b_bsize = bsize;
- if (n)
- add_arc(bufs[0].b_buf, bufs[1].b_buf);
- n = !n;
- }
- (void)fclose(fp);
- if (n)
- errx(1, "odd data count");
+static void *
+emalloc(s)
+ size_t s;
+{
+ return emem(malloc(s));
+}
- /* do the sort */
- tsort();
- exit(0);
+
+/***
+ *** Hash table.
+ ***/
+
+/* Inserting and finding nodes in the hash structure.
+ * We handle interval strings for efficiency wrt fgetln. */
+static struct node *
+new_node(start, end)
+ const char *start;
+ const char *end;
+{
+ struct node *n;
+
+ n = ohash_create_entry(&node_info, start, &end);
+ n->from = NULL;
+ n->arcs = NULL;
+ n->refs = 0;
+ n->mark = 0;
+ n->order = 0;
+ n->traverse = NULL;
+ return n;
}
-/* double the size of oldbuf and return a pointer to the new buffer. */
-void *
-grow_buf(bp, size)
- void *bp;
- int size;
+
+static void
+nodes_init(h)
+ struct ohash *h;
{
- if ((bp = realloc(bp, (u_int)size)) == NULL)
- err(1, NULL);
- return (bp);
+ ohash_init(h, HASH_START, &node_info);
}
-/*
- * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in
- * the graph, then add them.
- */
-void
-add_arc(s1, s2)
- char *s1, *s2;
+static struct node *
+node_lookup(h, start, end)
+ struct ohash *h;
+ const char *start;
+ const char *end;
{
- register NODE *n1;
- NODE *n2;
- int bsize, i;
+ unsigned int i;
+ struct node * n;
- n1 = get_node(s1);
+ i = ohash_qlookupi(h, start, &end);
- if (!strcmp(s1, s2))
+ n = ohash_find(h, i);
+ if (n == NULL)
+ n = ohash_insert(h, i, new_node(start, end));
+ return n;
+}
+
+#ifdef DEBUG
+static void
+dump_node(n)
+ struct node *n;
+{
+ struct link *l;
+
+ if (n->refs == 0)
return;
+ printf("%s (%u): ", n->k, n->refs);
+ for (l = n->arcs; l != NULL; l = l->next)
+ if (n->refs != 0)
+ printf("%s(%u) ", l->node->k, l->node->refs);
+ putchar('\n');
+}
+
+static void
+dump_array(a)
+ struct array *a;
+{
+ unsigned int i;
- n2 = get_node(s2);
+ for (i = 0; i < a->entries; i++)
+ dump_node(a->t[i]);
+}
+
+static void
+dump_hash(h)
+ struct hash *h;
+{
+ unsigned int i;
+ struct node *n;
+
+ for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))
+ dump_node(n);
+}
+#endif
+
+
+/***
+ *** Reading data.
+ ***/
+
+static void
+insert_arc(a, b)
+ struct node *a, *b;
+{
+ struct link *l;
- /*
- * Check if this arc is already here.
- */
- for (i = 0; i < n1->n_narcs; i++)
- if (n1->n_arcs[i] == n2)
+ /* Check that this arc is not already present. */
+ for (l = a->arcs; l != NULL; l = l->next) {
+ if (l->node == b)
return;
- /*
- * Add it.
- */
- if (n1->n_narcs == n1->n_arcsize) {
- if (!n1->n_arcsize)
- n1->n_arcsize = 10;
- bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
- n1->n_arcs = grow_buf(n1->n_arcs, bsize);
- n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
}
- n1->n_arcs[n1->n_narcs++] = n2;
- ++n2->n_refcnt;
+ b->refs++;
+ l = emalloc(sizeof(struct link));
+ l->node = b;
+ l->next = a->arcs;
+ a->arcs = l;
}
-/* Find a node in the graph (insert if not found) and return a pointer to it. */
-NODE *
-get_node(name)
- char *name;
+static void
+read_pairs(f, h, reverse, name)
+ FILE *f;
+ struct ohash *h;
+ int reverse;
+ const char *name;
{
- DBT data, key;
- NODE *n;
+ int toggle;
+ struct node *a;
+ size_t size;
+ char *str;
+ unsigned int o;
+
+ o = 1;
+ toggle = 1;
+ a = NULL;
+
+ while ((str = fgetln(f, &size)) != NULL) {
+ char *sentinel;
+
+ sentinel = str + size;
+ for (;;) {
+ char *e;
+
+ while (isspace(*str) && str < sentinel)
+ str++;
+ if (str == sentinel)
+ break;
+ for (e = str; !isspace(*e) && e < sentinel; e++)
+ continue;
+ if (toggle) {
+ a = node_lookup(h, str, e);
+ if (a->order == 0)
+ a->order = o++;
+ } else {
+ struct node *b;
+
+ b = node_lookup(h, str, e);
+ assert(a != NULL);
+ if (b != a) {
+ if (reverse)
+ insert_arc(b, a);
+ else
+ insert_arc(a, b);
+ }
+ }
+ toggle = !toggle;
+ str = e;
+ }
+ }
+ if (toggle == 0)
+ errx(EX_DATAERR, "odd number of pairs in %s", name);
+ if (!feof(f))
+ err(EX_IOERR, "error reading %s", name);
+}
- if (db == NULL &&
- (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
- err(1, "db: %s", name);
+static void
+read_hints(f, h, name)
+ FILE *f;
+ struct ohash *h;
+ const char *name;
+{
+ char *str;
+ size_t size;
+ unsigned int i;
- key.data = name;
- key.size = strlen(name) + 1;
+ i = 1;
- switch ((*db->get)(db, &key, &data, 0)) {
- case 0:
- bcopy(data.data, &n, sizeof(n));
- return (n);
- case 1:
- break;
- default:
- case -1:
- err(1, "db: %s", name);
- }
+ while ((str = fgetln(f, &size)) != NULL) {
+ char *sentinel;
+
+ sentinel = str + size;
+ for (;;) {
+ char *e;
+ struct node *a;
- if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
- err(1, NULL);
-
- n->n_narcs = 0;
- n->n_arcsize = 0;
- n->n_arcs = NULL;
- n->n_refcnt = 0;
- n->n_flags = 0;
- bcopy(name, n->n_name, key.size);
-
- /* Add to linked list. */
- if ((n->n_next = graph) != NULL)
- graph->n_prevp = &n->n_next;
- n->n_prevp = &graph;
- graph = n;
-
- /* Add to hash table. */
- data.data = &n;
- data.size = sizeof(n);
- if ((*db->put)(db, &key, &data, 0))
- err(1, "db: %s", name);
- return (n);
+ while (isspace(*str) && str < sentinel)
+ str++;
+ if (str == sentinel)
+ break;
+ for (e = str; !isspace(*e) && e < sentinel; e++)
+ continue;
+ a = node_lookup(h, str, e);
+ if (a->order != 0)
+ errx(EX_DATAERR,
+ "duplicate node %s in hints file %s",
+ a->k, name);
+ else
+ a->order = i++;
+ str = e;
+ }
+ }
}
+
+/***
+ *** Standard heap handling routines.
+ ***/
-/*
- * Clear the NODEST flag from all nodes.
- */
-void
-clear_cycle()
+static void
+heap_down(h, i)
+ struct array *h;
+ unsigned int i;
{
- NODE *n;
+ unsigned int j;
+ struct node *swap;
- for (n = graph; n != NULL; n = n->n_next)
- n->n_flags &= ~NF_NODEST;
+ for (; (j=2*i+1) < h->entries; i = j) {
+ if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)
+ j++;
+ if (h->t[i]->order <= h->t[j]->order)
+ break;
+ swap = h->t[i];
+ h->t[i] = h->t[j];
+ h->t[j] = swap;
+ }
}
-/* do topological sort on graph */
-void
-tsort()
+static void
+heapify(h)
+ struct array *h;
{
- register NODE *n, *next;
- register int cnt, i;
-
- while (graph != NULL) {
- /*
- * Keep getting rid of simple cases until there are none left,
- * if there are any nodes still in the graph, then there is
- * a cycle in it.
- */
- do {
- for (cnt = 0, n = graph; n != NULL; n = next) {
- next = n->n_next;
- if (n->n_refcnt == 0) {
- remove_node(n);
- ++cnt;
- }
- }
- } while (graph != NULL && cnt);
+ unsigned int i;
- if (graph == NULL)
- break;
+ for (i = h->entries; i != 0;)
+ heap_down(h, --i);
+}
- if (!cycle_buf) {
- /*
- * Allocate space for two cycle logs - one to be used
- * as scratch space, the other to save the longest
- * cycle.
- */
- for (cnt = 0, n = graph; n != NULL; n = n->n_next)
- ++cnt;
- cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
- longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
- if (cycle_buf == NULL || longest_cycle == NULL)
- err(1, NULL);
+#define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
+
+static struct node *
+dequeue(h)
+ struct array *h;
+{
+ struct node *n;
+
+ if (h->entries == 0)
+ n = NULL;
+ else {
+ n = h->t[0];
+ if (--h->entries != 0) {
+ h->t[0] = h->t[h->entries];
+ heap_down(h, 0);
}
- for (n = graph; n != NULL; n = n->n_next)
- if (!(n->n_flags & NF_ACYCLIC))
- if (cnt = find_cycle(n, n, 0, 0)) {
- if (!quiet) {
- warnx("cycle in data");
- for (i = 0; i < cnt; i++)
- warnx("%s",
- longest_cycle[i]->n_name);
- }
- remove_node(n);
- clear_cycle();
- break;
- } else {
- /* to avoid further checks */
- n->n_flags |= NF_ACYCLIC;
- clear_cycle();
- }
+ }
+ return n;
+}
+
+#define ENQUEUE(h, n) do { \
+ if (hints_flag) \
+ enqueue((h), (n)); \
+ else \
+ (h)->t[(h)->entries++] = (n); \
+ } while(0);
+
+static void
+enqueue(h, n)
+ struct array *h;
+ struct node *n;
+{
+ unsigned int i, j;
+ struct node *swap;
- if (n == NULL)
- errx(1, "internal error -- could not find cycle");
+ h->t[h->entries++] = n;
+ for (i = h->entries-1; i > 0; i = j) {
+ j = (i-1)/2;
+ if (h->t[j]->order < h->t[i]->order)
+ break;
+ swap = h->t[j];
+ h->t[j] = h->t[i];
+ h->t[i] = swap;
}
}
-/* print node and remove from graph (does not actually free node) */
-void
-remove_node(n)
- register NODE *n;
+
+/***
+ *** Search through hash array for nodes.
+ ***/
+
+/* Split nodes into unrefed nodes/live nodes. */
+static void
+split_nodes(hash, heap, remaining)
+ struct ohash *hash;
+ struct array *heap;
+ struct array *remaining;
{
- register NODE **np;
- register int i;
-
- (void)printf("%s\n", n->n_name);
- for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
- --(*np)->n_refcnt;
- n->n_narcs = 0;
- *n->n_prevp = n->n_next;
- if (n->n_next)
- n->n_next->n_prevp = n->n_prevp;
+
+ struct node *n;
+ unsigned int i;
+
+ heap->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
+ remaining->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
+ heap->entries = 0;
+ remaining->entries = 0;
+
+ for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {
+ if (n->refs == 0)
+ heap->t[heap->entries++] = n;
+ else
+ remaining->t[remaining->entries++] = n;
+ }
}
+/* Good point to break a cycle: live node with as few refs as possible. */
+static struct node *
+find_good_cycle_break(h)
+ struct array *h;
+{
+ unsigned int i;
+ unsigned int best;
+ struct node *u;
+
+ best = UINT_MAX;
+ u = NULL;
+
+ assert(h->entries != 0);
+ for (i = 0; i < h->entries; i++) {
+ struct node *n = h->t[i];
+ /* No need to look further. */
+ if (n->refs == 1)
+ return n;
+ if (n->refs != 0 && n->refs < best) {
+ best = n->refs;
+ u = n;
+ }
+ }
+ assert(u != NULL);
+ return u;
+}
+
+/* Retrieve the node with the smallest order. */
+static struct node *
+find_smallest_node(h)
+ struct array *h;
+{
+ unsigned int i;
+ unsigned int best;
+ struct node *u;
+
+ best = UINT_MAX;
+ u = NULL;
+
+ assert(h->entries != 0);
+ for (i = 0; i < h->entries; i++) {
+ struct node *n = h->t[i];
+ if (n->refs != 0 && n->order < best) {
+ best = n->order;
+ u = n;
+ }
+ }
+ assert(u != NULL);
+ return u;
+}
-/* look for the longest? cycle from node from to node to. */
-int
-find_cycle(from, to, longest_len, depth)
- NODE *from, *to;
- int depth, longest_len;
+
+/***
+ *** Graph algorithms.
+ ***/
+
+/* Explore the nodes reachable from i to find a cycle containing it, store
+ * it in c. This may fail. */
+static int
+find_cycle_with(i, c)
+ struct node *i;
+ struct array *c;
{
- register NODE **np;
- register int i, len;
-
- /*
- * avoid infinite loops and ignore portions of the graph known
- * to be acyclic
- */
- if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
- return (0);
- from->n_flags |= NF_MARK;
-
- for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
- cycle_buf[depth] = *np;
- if (*np == to) {
- if (depth + 1 > longest_len) {
- longest_len = depth + 1;
- (void)memcpy((char *)longest_cycle,
- (char *)cycle_buf,
- longest_len * sizeof(NODE *));
+ struct node *n;
+
+ n = i;
+ /* XXX Previous cycle findings may have left this pointer non-null. */
+ i->from = NULL;
+
+ for (;;) {
+ /* Note that all marks are reversed before this code exits. */
+ n->mark = 1;
+ if (n->traverse)
+ n->traverse = n->traverse->next;
+ else
+ n->traverse = n->arcs;
+ /* Skip over dead nodes. */
+ while (n->traverse && n->traverse->node->refs == 0)
+ n->traverse = n->traverse->next;
+ if (n->traverse) {
+ struct node *go = n->traverse->node;
+
+ if (go->mark) {
+ if (go == i) {
+ c->entries = 0;
+ for (; n != NULL; n = n->from)
+ c->t[c->entries++] = n;
+ return 1;
+ }
+ } else {
+ go->from = n;
+ n = go;
}
} else {
- if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
- continue;
- len = find_cycle(*np, to, longest_len, depth + 1);
+ n->mark = 0;
+ n = n->from;
+ if (n == NULL)
+ return 0;
+ }
+ }
+}
- if (debug)
- (void)printf("%*s %s->%s %d\n", depth, "",
- from->n_name, to->n_name, len);
+/* Find a live predecessor of node n. This is a slow routine, as it needs
+ * to go through the whole array, but it is not needed often.
+ */
+static struct node *
+find_predecessor(a, n)
+ struct array *a;
+ struct node *n;
+{
+ unsigned int i;
- if (len == 0)
- (*np)->n_flags |= NF_NODEST;
+ for (i = 0; i < a->entries; i++) {
+ struct node *m;
- if (len > longest_len)
- longest_len = len;
+ m = a->t[i];
+ if (m->refs != 0) {
+ struct link *l;
- if (len > 0 && !longest)
- break;
+ for (l = m->arcs; l != NULL; l = l->next)
+ if (l->node == n)
+ return m;
+ }
+ }
+ assert(1 == 0);
+ return NULL;
+}
+
+/* Traverse all strongly connected components reachable from node n.
+ Start numbering them at o. Return the maximum order reached.
+ Update the largest cycle found so far.
+ */
+static unsigned int
+traverse_node(n, o, c)
+ struct node *n;
+ unsigned int o;
+ struct array *c;
+{
+ unsigned int min, max;
+
+ n->from = NULL;
+ min = o;
+ max = ++o;
+
+ for (;;) {
+ n->mark = o;
+ if (DEBUG_TRAVERSE)
+ printf("%s(%d) ", n->k, n->mark);
+ /* Find next arc to explore. */
+ if (n->traverse)
+ n->traverse = n->traverse->next;
+ else
+ n->traverse = n->arcs;
+ /* Skip over dead nodes. */
+ while (n->traverse && n->traverse->node->refs == 0)
+ n->traverse = n->traverse->next;
+ /* If arc left. */
+ if (n->traverse) {
+ struct node *go;
+
+ go = n->traverse->node;
+ /* Optimisation: if go->mark < min, we already
+ * visited this strongly-connected component in
+ * a previous pass. Hence, this can yield no new
+ * cycle. */
+
+ /* Not part of the current path: go for it. */
+ if (go->mark == 0 || go->mark == min) {
+ go->from = n;
+ n = go;
+ o++;
+ if (o > max)
+ max = o;
+ /* Part of the current path: check cycle length. */
+ } else if (go->mark > min) {
+ if (DEBUG_TRAVERSE)
+ printf("%d\n", o - go->mark + 1);
+ if (o - go->mark + 1 > c->entries) {
+ struct node *t;
+ unsigned int i;
+
+ c->entries = o - go->mark + 1;
+ i = 0;
+ c->t[i++] = go;
+ for (t = n; t != go; t = t->from)
+ c->t[i++] = t;
+ }
+ }
+
+ /* No arc left: backtrack. */
+ } else {
+ n->mark = min;
+ n = n->from;
+ if (!n)
+ return max;
+ o--;
+ }
+ }
+}
+
+static void
+print_cycle(c)
+ struct array *c;
+{
+ unsigned int i;
+
+ /* Printing in reverse order, since cycle discoveries finds reverse
+ * edges. */
+ for (i = c->entries; i != 0;) {
+ i--;
+ warnx("%s", c->t[i]->k);
+ }
+}
+
+static struct node *
+find_longest_cycle(h, c)
+ struct array *h;
+ struct array *c;
+{
+ unsigned int i;
+ unsigned int o;
+ unsigned int best;
+ struct node *n;
+ static int notfirst = 0;
+
+ assert(h->entries != 0);
+
+ /* No cycle found yet. */
+ c->entries = 0;
+
+ /* Reset the set of marks, except the first time around. */
+ if (notfirst) {
+ for (i = 0; i < h->entries; i++)
+ h->t[i]->mark = 0;
+ } else
+ notfirst = 1;
+
+ o = 0;
+
+ /* Traverse the array. Each unmarked, live node heralds a
+ * new set of strongly connected components. */
+ for (i = 0; i < h->entries; i++) {
+ n = h->t[i];
+ if (n->refs != 0 && n->mark == 0) {
+ /* Each call to traverse_node uses a separate
+ * interval of numbers to mark nodes. */
+ o++;
+ o = traverse_node(n, o, c);
+ }
+ }
+
+ assert(c->entries != 0);
+ n = c->t[0];
+ best = n->refs;
+ for (i = 0; i < c->entries; i++) {
+ if (c->t[i]->refs < best) {
+ n = c->t[i];
+ best = n->refs;
}
}
- from->n_flags &= ~NF_MARK;
- return (longest_len);
+ return n;
}
-void
+
+#define plural(n) ((n) > 1 ? "s" : "")
+
+int
+main(argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct ohash pairs;
+ int reverse_flag, quiet_flag, long_flag,
+ warn_flag, hints_flag, verbose_flag;
+
+ reverse_flag = quiet_flag = long_flag =
+ warn_flag = hints_flag = verbose_flag = 0;
+ nodes_init(&pairs);
+
+ {
+ int c;
+
+ while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {
+ switch(c) {
+ case 'h': {
+ FILE *f;
+
+ f = fopen(optarg, "r");
+ if (f == NULL)
+ err(EX_NOINPUT, "Can't open hint file %s",
+ optarg);
+ read_hints(f, &pairs, optarg);
+ fclose(f);
+ }
+ /*FALLTHRU*/
+ case 'f':
+ if (hints_flag == 1)
+ usage();
+ hints_flag = 1;
+ break;
+ case 'l':
+ long_flag = 1;
+ break;
+ case 'q':
+ quiet_flag = 1;
+ break;
+ case 'r':
+ reverse_flag = 1;
+ break;
+ case 'v':
+ verbose_flag = 1;
+ break;
+ case 'w':
+ warn_flag = 1;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+ }
+
+ switch(argc) {
+ case 1: {
+ FILE *f;
+
+ f = fopen(argv[0], "r");
+ if (f == NULL)
+ err(EX_NOINPUT, "Can't open file %s", argv[1]);
+ read_pairs(f, &pairs, reverse_flag, argv[1]);
+ fclose(f);
+ break;
+ }
+ case 0:
+ read_pairs(stdin, &pairs, reverse_flag, "stdin");
+ break;
+ default:
+ usage();
+ }
+
+ {
+ struct array aux; /* Unrefed nodes/cycle reporting. */
+ struct array remaining;
+ unsigned int broken_arcs, broken_cycles;
+ unsigned int left;
+
+ broken_arcs = 0;
+ broken_cycles = 0;
+
+ split_nodes(&pairs, &aux, &remaining);
+ ohash_delete(&pairs);
+
+ if (hints_flag)
+ heapify(&aux);
+
+ left = remaining.entries + aux.entries;
+ while (left != 0) {
+
+ /* Standard topological sort. */
+ while (aux.entries) {
+ struct link *l;
+ struct node *n;
+
+ n = DEQUEUE(&aux);
+ printf("%s\n", n->k);
+ left--;
+ /* We can't free nodes, as we don't know which
+ * entry we can remove in the hash table. We
+ * rely on refs == 0 to recognize live nodes.
+ * Decrease ref count of live nodes, enter new
+ * candidates into the unrefed list. */
+ for (l = n->arcs; l != NULL; l = l->next)
+ if (l->node->refs != 0 &&
+ --l->node->refs == 0) {
+ ENQUEUE(&aux, l->node);
+ }
+ }
+ /* There are still cycles to break. */
+ if (left != 0) {
+ struct node *n;
+
+ broken_cycles++;
+ /* XXX Simple cycle detection and long cycle
+ * detection are mutually exclusive. */
+
+ if (long_flag) {
+ n = find_longest_cycle(&remaining, &aux);
+ } else {
+ if (hints_flag)
+ n = find_smallest_node(&remaining);
+ else
+ n = find_good_cycle_break(&remaining);
+ if (!quiet_flag) {
+ while (!find_cycle_with(n, &aux))
+ n = find_predecessor(&remaining, n);
+ }
+ }
+
+ if (!quiet_flag) {
+ warnx("cycle in data");
+ print_cycle(&aux);
+ }
+
+ if (verbose_flag)
+ warnx("%u edge%s broken", n->refs,
+ plural(n->refs));
+ broken_arcs += n->refs;
+ n->refs = 0;
+ /* Reinitialization, cycle reporting uses aux. */
+ aux.t[0] = n;
+ aux.entries = 1;
+ }
+ }
+ if (verbose_flag && broken_cycles != 0)
+ warnx("%u cycle%s broken, for a total of %u edge%s",
+ broken_cycles, plural(broken_cycles),
+ broken_arcs, plural(broken_arcs));
+ if (warn_flag)
+ exit(broken_cycles < 256 ? broken_cycles : 255);
+ else
+ exit(EX_OK);
+ }
+}
+
+
+extern char *__progname;
+
+static void
usage()
{
- (void)fprintf(stderr, "usage: tsort [-lq] [file]\n");
- exit(1);
+ fprintf(stderr, "Usage: %s [-h file] [-flqrvw] [file]\n", __progname);
+ exit(EX_USAGE);
}