I have this question for my programming class which I have been struggling to complete for the past day ... and I have no real idea what to do.
I understand the basic concept of Prim's algorithm:
1. Start at an arbitrary node (the first node will do) and
add all of its links onto a list.
2. Add the smallest link (which doesn't duplicate an existing path)
in the MST, to the Minimum Spanning Tree.
Remove this link from the list.
3. Add all of the links from the newly linked node onto the list
4. repeat steps 2 & 3 until MST is achieved
(there are no nodes left unconnected).
I have been given this implementation of a Graph (using an Adjacency List) to implement Prim's algorithm on. The problem is I don't really understand the implementation. My understanding of the implementation so far is as follows:
Being an adjacency list, we have all the nodes in array form: Linked to this is a list of links, containing details of the weight, the destination, and a pointer to the rest of the links of the specific node:
Something that looks a bit like this:
[0] -> [weight = 1][Destination = 3] -> [weight = 6][Destination = 4][NULL]
[1] -> [weight = 4][Destination = 3][NULL]
and so on...
We also have an "Edge" struct, which I think is supposed to make things simpler for the implementation, but I'm not really seeing it.
Here is the code given:
GRAPH.h interface:
typedef struct {
int v;
int w;
int weight;
} Edge;
Edge EDGE (int, int, int);
typedef struct graph *Graph;
Graph GRAPHinit (int);
void GRAPHinsertE (Graph, Edge);
void GRAPHremoveE (Graph, Edge);
int GRAPHedges (Edge [], Graph g);
Graph GRAPHcopy (Graph);
void GRAPHdestroy (Graph);
int GRAPHedgeScan (Edge *);
void GRAPHEdgePrint (Edge);
int GRAPHsearch (Graph, int[]);
Graph GRAPHmst (Graph);
Graph GRAPHmstPrim (Graph);
#define maxV 8
GRAPH.c implementation:
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
#define exch(A, B) { Edge t = A; A = B; B = t; }
#define max(A,B)(A>B?A:B)
#define min(A,B)(A<B?A:B)
typedef struct node *link;
struct node {
int v;
int weight;
link next;
};
struct graph {
int V;
int E;
link *adj;
};
static void sortEdges (Edge *edges, int noOfEdges);
static void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent);
Edge EDGE (int v, int w, int weight) {
Edge e = {v, w, weight};
return e;
}
link NEW (int v, int weight, link next) {
link x = malloc (sizeof *x);
x->v = v;
x->next = next;
x->weight = weight;
return x;
}
Graph GRAPHinit (int V) {
int v;
Graph G = malloc (sizeof *G);
// Set the size of the graph, = number of verticies
G->V = V;
G->E = 0;
G->adj = malloc (V * sizeof(link));
for (v = 0; v < V; v++){
G->adj[v] = NULL;
}
return G;
}
void GRAPHdestroy (Graph g) {
// not implemented yet
}
void GRAPHinsertE(Graph G, Edge e){
int v = e.v;
int w = e.w;
int weight = e.weight;
G->adj[v] = NEW (w, weight, G->adj[v]);
G->adj[w] = NEW (v, weight, G->adj[w]);
G->E++;
}
void GRAPHremoveE(Graph G, Edge e){
int v = e.v;
int w = e.w;
link *curr;
curr = &G->adj[w];
while (*curr != NULL){
if ((*curr)->v == v) {
(*curr) = (*curr)->next;
G->E--;
break;
}
curr= &((*curr)->next);
}
curr = &G->adj[v];
while (*curr != NULL){
if ((*curr)->v == w) {
(*curr) = (*curr)->next;
break;
}
curr= &((*curr)->next);
}
}
int GRAPHedges (Edge edges[], Graph g) {
int v, E = 0;
link t;
for (v = 0; v < g->V; v++) {
for (t = g->adj[v]; t != NULL; t = t->next) {
if (v < t->v) {
edges[E++] = EDGE(v, t->v, t->weight);
}
}
}
return E;
}
void GRAPHEdgePrint (Edge edge) {
printf ("%d -- (%d) -- %d", edge.v, edge.weight, edge.w);
}
int GRAPHedgeScan (Edge *edge) {
if (edge == NULL) {
printf ("GRAPHedgeScan: called with NULL \n");
abort();
}
if ((scanf ("%d", &(edge->v)) == 1) &&
(scanf ("%d", &(edge->w)) == 1) &&
(scanf ("%d", &(edge->weight)) == 1)) {
return 1;
} else {
return 0;
}
}
// Update the CC label for all the nodes in the MST reachable through the edge from-to
// Assumes graph is a tree, will not terminate otherwise.
void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent) {
link currLink = g->adj[to];
connectedComponent[to] = newVal;
while (currLink != NULL) {
if (currLink->v != from) {
updateConnectedComponent (g, to, currLink->v, newVal, connectedComponent);
}
currLink = currLink->next;
}
}
// insertion sort, replace with O(n * lon n) alg to get
// optimal work complexity for Kruskal
void sortEdges (Edge *edges, int noOfEdges) {
int i;
int l = 0;
int r = noOfEdges-1;
for (i = r-1; i >= l; i--) {
int j = i;
while ((j < r) && (edges[j].weight > edges[j+1].weight)) {
exch (edges[j], edges[j+1]);
j++;
}
}
}
Graph GRAPHmst (Graph g) {
Edge *edgesSorted;
int i;
int *connectedComponent = malloc (sizeof (int) * g->V);
int *sizeOfCC = malloc (sizeof (int) * g->V);
Graph mst = GRAPHinit (g->V);
edgesSorted = malloc (sizeof (*edgesSorted) * g->E);
GRAPHedges (edgesSorted, g);
sortEdges (edgesSorted, g->E);
// keep track of the connected component each vertex belongs to
// in the current MST. Initially, MST is empty, so no vertex is
// in an MST CC, therefore all are set to -1.
// We also keep track of the size of each CC, so that we're able
// to identify the CC with fewer vertices when merging two CCs
for (i = 0; i < g->V; i++) {
connectedComponent[i] = -1;
sizeOfCC[i] = 0;
}
int currentEdge = 0; // the shortest edge not yet in the mst
int mstCnt = 0; // no of edges currently in the mst
int v, w;
// The MST can have at most min (g->E, g->V-1) edges
while ((currentEdge < g->E) && (mstCnt < g->V)) {
v = edgesSorted[currentEdge].v;
w = edgesSorted[currentEdge].w;
printf ("Looking at Edge ");
GRAPHEdgePrint (edgesSorted[currentEdge]);
if ((connectedComponent[v] == -1) ||
(connectedComponent[w] == -1)) {
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
if (connectedComponent[v] == connectedComponent[w]) {
connectedComponent[v] = mstCnt;
connectedComponent[w] = mstCnt;
sizeOfCC[mstCnt] = 2; // initialise a new CC
} else {
connectedComponent[v] = max (connectedComponent[w], connectedComponent[v]);
connectedComponent[w] = max (connectedComponent[w], connectedComponent[v]);
sizeOfCC[connectedComponent[w]]++;
}
printf (" is in MST\n");
} else if (connectedComponent[v] == connectedComponent[w]) {
printf (" is not in MST\n");
} else {
printf (" is in MST, connecting two msts\n");
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
// update the CC label of all the vertices in the smaller CC
// (size is only important for performance, not correctness)
if (sizeOfCC[connectedComponent[w]] > sizeOfCC[connectedComponent[v]]) {
updateConnectedComponent (mst, v, v, connectedComponent[w], connectedComponent);
sizeOfCC[connectedComponent[w]] += sizeOfCC[connectedComponent[v]];
} else {
updateConnectedComponent (mst, w, w, connectedComponent[v], connectedComponent);
sizeOfCC[connectedComponent[v]] += sizeOfCC[connectedComponent[w]];
}
}
currentEdge++;
}
free (edgesSorted);
free (connectedComponent);
free (sizeOfCC);
return mst;
}
// my code so far
Graph GRAPHmstPrim (Graph g) {
// Initializations
Graph mst = GRAPHinit (g->V); // graph to hold the MST
int i = 0;
int nodeIsConnected[g->V];
// initially all nodes are not connected, initialize as 0;
for(i = 0; i < g->V; i++) {
nodeIsConnected[i] = 0;
}
// extract the first vertex from the graph
nodeIsConnected[0] = 1;
// push all of the links from the first node onto a temporary list
link tempList = newList();
link vertex = g->adj[0];
while(vertex != NULL) {
tempList = prepend(tempList, vertex);
vertex = vertex->next;
}
// find the smallest link from the node;
mst->adj[0] =
}
// some helper functions I've been writing
static link newList(void) {
return NULL;
}
static link prepend(link list, link node) {
link temp = list;
list = malloc(sizeof(list));
list->v = node->v;
list->weigth = node->weight;
list->next = temp;
return list;
}
static link getSmallest(link list, int nodeIsConnected[]) {
link smallest = list;
while(list != NULL){
if((list->weight < smallest->weight)&&(nodeIsConnected[list->v] == 0)) {
smallest = list;
}
list = list->next;
}
if(nodeIsConnected[smallest->v] != 0) {
return NULL;
} else {
return smallest;
}
}
For clarity, file to obtain test data from file:
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
// call with graph_e1.txt as input, for example.
//
int main (int argc, char *argv[]) {
Edge e, *edges;
Graph g, mst;
int graphSize, i, noOfEdges;
if (argc < 2) {
printf ("No size provided - setting max. no of vertices to %d\n", maxV);
graphSize = maxV;
} else {
graphSize = atoi (argv[1]);
}
g = GRAPHinit (graphSize);
printf ("Reading graph edges (format: v w weight) from stdin\n");
while (GRAPHedgeScan (&e)) {
GRAPHinsertE (g, e);
}
edges = malloc (sizeof (*edges) * graphSize * graphSize);
noOfEdges = GRAPHedges (edges, g);
printf ("Edges of the graph:\n");
for (i = 0; i < noOfEdges; i++) {
GRAPHEdgePrint (edges[i]);
printf ("\n");
}
mst = GRAPHmstPrim (g);
noOfEdges = GRAPHedges (edges, mst);
printf ("\n MST \n");
for (i = 0; i < noOfEdges; i++) {
GRAPHEdgePrint (edges[i]);
printf ("\n");
}
GRAPHdestroy (g);
GRAPHdestroy (mst);
free (edges);
return EXIT_SUCCESS;
}
Thanks in advance.
Luke
files in full: http://www.cse.unsw.edu.au/~cs1927/12s2/labs/13/MST.html
UPDATE: I have had another attempt at this question. Here is the updated code (One edit above to change the graph_client.c to use "GRAPHmstPrim" function that I have written.
GRAPH_adjlist.c::
#include <stdlib.h>
#include <stdio.h>
#include "GRAPH.h"
#define exch(A, B) { Edge t = A; A = B; B = t; }
#define max(A,B)(A>B?A:B)
#define min(A,B)(A<B?A:B)
typedef struct _node *link;
struct _node {
int v;
int weight;
link next;
}node;
struct graph {
int V;
int E;
link *adj;
};
typedef struct _edgeNode *edgeLink;
struct _edgeNode {
int v;
int w;
int weight;
edgeLink next;
}edgeNode;
static void sortEdges (Edge *edges, int noOfEdges);
static void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent);
Edge EDGE (int v, int w, int weight) {
Edge e = {v, w, weight};
return e;
}
link NEW (int v, int weight, link next) {
link x = malloc (sizeof *x);
x->v = v;
x->next = next;
x->weight = weight;
return x;
}
Graph GRAPHinit (int V) {
int v;
Graph G = malloc (sizeof *G);
G->V = V;
G->E = 0;
G->adj = malloc (V * sizeof(link));
for (v = 0; v < V; v++){
G->adj[v] = NULL;
}
return G;
}
void GRAPHdestroy (Graph g) {
// not implemented yet
}
void GRAPHinsertE(Graph G, Edge e){
int v = e.v;
int w = e.w;
int weight = e.weight;
G->adj[v] = NEW (w, weight, G->adj[v]);
G->adj[w] = NEW (v, weight, G->adj[w]);
G->E++;
}
void GRAPHremoveE(Graph G, Edge e){
int v = e.v;
int w = e.w;
link *curr;
curr = &G->adj[w];
while (*curr != NULL){
if ((*curr)->v == v) {
(*curr) = (*curr)->next;
G->E--;
break;
}
curr= &((*curr)->next);
}
curr = &G->adj[v];
while (*curr != NULL){
if ((*curr)->v == w) {
(*curr) = (*curr)->next;
break;
}
curr= &((*curr)->next);
}
}
int GRAPHedges (Edge edges[], Graph g) {
int v, E = 0;
link t;
for (v = 0; v < g->V; v++) {
for (t = g->adj[v]; t != NULL; t = t->next) {
if (v < t->v) {
edges[E++] = EDGE(v, t->v, t->weight);
}
}
}
return E;
}
void GRAPHEdgePrint (Edge edge) {
printf ("%d -- (%d) -- %d", edge.v, edge.weight, edge.w);
}
int GRAPHedgeScan (Edge *edge) {
if (edge == NULL) {
printf ("GRAPHedgeScan: called with NULL \n");
abort();
}
if ((scanf ("%d", &(edge->v)) == 1) &&
(scanf ("%d", &(edge->w)) == 1) &&
(scanf ("%d", &(edge->weight)) == 1)) {
return 1;
} else {
return 0;
}
}
// Update the CC label for all the nodes in the MST reachable through the edge from-to
// Assumes graph is a tree, will not terminate otherwise.
void updateConnectedComponent (Graph g, int from, int to, int newVal, int *connectedComponent) {
link currLink = g->adj[to];
connectedComponent[to] = newVal;
while (currLink != NULL) {
if (currLink->v != from) {
updateConnectedComponent (g, to, currLink->v, newVal, connectedComponent);
}
currLink = currLink->next;
}
}
// insertion sort, replace with O(n * lon n) alg to get
// optimal work complexity for Kruskal
void sortEdges (Edge *edges, int noOfEdges) {
int i;
int l = 0;
int r = noOfEdges-1;
for (i = r-1; i >= l; i--) {
int j = i;
while ((j < r) && (edges[j].weight > edges[j+1].weight)) {
exch (edges[j], edges[j+1]);
j++;
}
}
}
Graph GRAPHmst (Graph g) {
Edge *edgesSorted;
int i;
int *connectedComponent = malloc (sizeof (int) * g->V);
int *sizeOfCC = malloc (sizeof (int) * g->V);
Graph mst = GRAPHinit (g->V);
edgesSorted = malloc (sizeof (*edgesSorted) * g->E);
GRAPHedges (edgesSorted, g);
sortEdges (edgesSorted, g->E);
// keep track of the connected component each vertex belongs to
// in the current MST. Initially, MST is empty, so no vertex is
// in an MST CC, therefore all are set to -1.
// We also keep track of the size of each CC, so that we're able
// to identify the CC with fewer vertices when merging two CCs
for (i = 0; i < g->V; i++) {
connectedComponent[i] = -1;
sizeOfCC[i] = 0;
}
int currentEdge = 0; // the shortest edge not yet in the mst
int mstCnt = 0; // no of edges currently in the mst
int v, w;
// The MST can have at most min (g->E, g->V-1) edges
while ((currentEdge < g->E) && (mstCnt < g->V)) {
v = edgesSorted[currentEdge].v;
w = edgesSorted[currentEdge].w;
printf ("Looking at Edge ");
GRAPHEdgePrint (edgesSorted[currentEdge]);
if ((connectedComponent[v] == -1) ||
(connectedComponent[w] == -1)) {
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
if (connectedComponent[v] == connectedComponent[w]) {
connectedComponent[v] = mstCnt;
connectedComponent[w] = mstCnt;
sizeOfCC[mstCnt] = 2; // initialise a new CC
} else {
connectedComponent[v] = max (connectedComponent[w], connectedComponent[v]);
connectedComponent[w] = max (connectedComponent[w], connectedComponent[v]);
sizeOfCC[connectedComponent[w]]++;
}
printf (" is in MST\n");
} else if (connectedComponent[v] == connectedComponent[w]) {
printf (" is not in MST\n");
} else {
printf (" is in MST, connecting two msts\n");
GRAPHinsertE (mst, edgesSorted[currentEdge]);
mstCnt++;
// update the CC label of all the vertices in the smaller CC
// (size is only important for performance, not correctness)
if (sizeOfCC[connectedComponent[w]] > sizeOfCC[connectedComponent[v]]) {
updateConnectedComponent (mst, v, v, connectedComponent[w], connectedComponent);
sizeOfCC[connectedComponent[w]] += sizeOfCC[connectedComponent[v]];
} else {
updateConnectedComponent (mst, w, w, connectedComponent[v], connectedComponent);
sizeOfCC[connectedComponent[v]] += sizeOfCC[connectedComponent[w]];
}
}
currentEdge++;
}
free (edgesSorted);
free (connectedComponent);
free (sizeOfCC);
return mst;
}
edgeLink newEdgeList(void) {
return NULL;
}
edgeLink addEdgeList(edgeLink list, int node, link edge) {
printf("EdgeListStart");
edgeLink temp = list;
list = malloc(sizeof(edgeNode));
list->w = node;
list->v = edge->v;
list->weight = edge->weight;
list->next = temp;
printf("EdgeListEnd");
return list;
}
edgeLink findSmallest(edgeLink waitList, int nodeIsConnected[]) {
printf("SmallestSTart");
edgeLink smallest = waitList;
int small = 99999;
while(waitList != NULL) {
if((waitList->weight < small)&&(nodeIsConnected[waitList->v] == 0)) {
smallest = waitList;
small = smallest->weight;
} else {
printf("\n\n smallest already used %d", waitList->v);
}
waitList = waitList->next;
}
printf("SmallestEnd");
if(nodeIsConnected[smallest->v] == 0){
return smallest;
} else {
printf("Returning NULL");
return NULL;
}
}
link addList(edgeLink smallest, link list, int v) {
printf(":istsatt");
link temp = list;
list = malloc(sizeof(node));
list->v = v;
list->weight = smallest->weight;
list->next = temp;
printf("Listend");
return list;
}
Graph GRAPHmstPrim (Graph g) {
Graph mst = GRAPHinit (g->V); // graph to hold the MST
int i = 0;
int v = 0;
int w = 0;
int nodeIsConnected[g->V]; // array to hold whether a vertex has been added to MST
int loopStarted = 0;
edgeLink smallest = NULL;
// initially all nodes are not in the MST
for(i = 0; i < g->V; i++) {
nodeIsConnected[i] = 0;
}
while((smallest != NULL)||(loopStarted == 0)) {
printf("v is : %d", v);
// add the very first node to the MST
nodeIsConnected[v] = 1;
loopStarted = 1;
// push all of its links onto the list
link vertex = g->adj[v];
edgeLink waitList = newEdgeList();
while(vertex != NULL) {
waitList = addEdgeList(waitList, v, vertex);
vertex = vertex->next;
}
// find the smallest edge from the list
// which doesn't duplicate a connection
smallest = findSmallest(waitList, nodeIsConnected);
// no nodes don't duplicate a connection
// return the current MST
if(smallest == NULL){
return mst;
}
// otherwise add the attributes to the MST graph
w = smallest->w;
v = smallest->v;
mst->adj[v] = addList(smallest, mst->adj[v], w);
mst->adj[w] = addList(smallest, mst->adj[w], v);
}
return mst;
}
Summary of changes: - Added edgeList to hold the edges that may be entered into the MST - Array nodeIsConnected[] to track whether a node is in the MST - Function to select the smallest node. If there is no node which doesn't duplicate a link this returns NULL
Seeing as this seems homework, I'm not going to give the entire answer in code. Your code seems to be on the right track. The next step you need is indeed to add the smallest link from your temporary list to to your mst. By adding the smallest one from your list, you are actually connecting your (partially built) mst to a node that is not yet in your mst. The link with the smallest weight will always be the cheapest way to connect the nodes in your mst to the other nodes.
When you add the smallest link, you are adding a node to the partially built tree and you need to update your temporary list. You need to add all the links of your new node to the list. Once you've done that, your temporary list contains all links of all nodes in your partially built mst. You continue that process of adding nodes until all nodes are in your mst.
When adding the cheapest link, you need to check if you are connecting a new node to your mst. The cheapest link could be connecting 2 nodes that are already in your mst. If so, that link needs to be skipped and you take the next cheapest one. There are actually several ways of handling this. You could maintain a set/vector of nodes that are already in your mst, maintain a vector of booleans to track the status of a node or make sure your temporary list only contains links that connect new nodes (although this is the most intensive approach).