Introductions to the NetworkX Python package for network analysis
# Magic line to ensure plotting happens in Jupyter
%matplotlib inline
# PyPlot is an object-oriented plot interface to matplotlib
import matplotlib.pyplot as plt
# Load up the networkx package
import networkx as nx
NetworkX supports several graph constructs: undirected, directed, multigraph, and directed multigraph.
g = nx.Graph()
Currently, this graph g
is empty. It has no nodes (i.e., vertices) and no edges (i.e., links).
We'll need to add nodes and edges to the graph to analyze to recreate our toy graph.
# Recreate our toy example graph
g.add_node(0)
g.add_node(1)
g.add_node(2)
g.add_node(3)
g.add_node(4)
g.add_node(5)
g.add_node(6)
g.add_node(7)
g.add_node(8)
g.add_node(9)
g.add_node(10)
# Add edges
g.add_edge(0, 9)
g.add_edge(0, 8)
g.add_edge(0, 3)
g.add_edge(0, 2)
g.add_edge(0, 1)
g.add_edge(0, 4)
g.add_edge(0, 5)
g.add_edge(1, 2)
g.add_edge(5, 1)
g.add_edge(5, 2)
g.add_edge(5, 3)
g.add_edge(5, 4)
g.add_edge(5, 6)
g.add_edge(6, 7)
g.add_edge(7, 10)
print(len(g.nodes()))
print(len(g.edges()))
We can use NetworkX for rudimentary drawing too.
nx.draw(g, with_labels=True)
plt.show()
It's easy to iterate through nodes and edges of a graph...
print("Nodes:")
for node in g.nodes():
print("\tNode:", node)
print("Edges:")
for edge in g.edges():
print("\tEdge:", edge)
Adding duplicate nodes or edges to a standard Graph
object will have no effect.
g.add_node(0)
g.add_edge(7, 10)
print("Nodes:")
for node in g.nodes():
print("\tNode:", node)
print("Edges:")
for edge in g.edges():
print("\tEdge:", edge)
We can add information to nodes and edges that help us keep track of information, labels, or weights.
# A dictionary with information for nodes
# to which we will add attributes
node_attribute_dict = {
0: {"label": "center"},
5: {"label": "important"}
}
# Set the attributes
nx.set_node_attributes(g, node_attribute_dict)
# If you just print the data node list, you don't see the associated
# attributes
print("Nodes without data:")
for node in g.nodes():
print("\tNode:", node)
# Need to use the data=True named argument
print("Nodes w/ data:")
for node in g.nodes(data=True):
print("\tNode:", node)
Doing the same for edges, we can add weights...
# A dictionary with information for edges
# to which we will add attributes
edge_attribute_dict = {
(0, 5): {"weight": 5},
(6, 5): {"weight": 2}, # Note order is irrelevant for undirected graph
}
# Set the attributes
nx.set_edge_attributes(g, edge_attribute_dict)
# If you just print the data node list, you don't see the associated
# attributes
print("Edges without data:")
for e in g.edges():
print("\tEdge:", e)
# Need to use the data=True named argument
print("Edges w/ data:")
for e in g.edges(data=True):
print("\tEdge:", e)
We can also add such attributes when we create the graph.
g.add_node(11, label="extra")
g.add_edge(10, 11, weight=1)
print("Nodes:")
for node in g.nodes(data=True):
print("\tNode:", node)
print("Edges:")
for edge in g.edges(data=True):
print("\tEdge:", edge)
We can also delete nodes.
nx.draw(g, with_labels=True)
plt.show()
g.remove_node(11)
nx.draw(g, with_labels=True)
plt.show()
NetworkX supports many graph metrics.
# What is the degree of each node?
g.degree()
# What is the diameter of the graph?
nx.diameter(g)
# What is the density of the graph?
nx.density(g)
# What's the global clustering coefficient?
print("Global Coefficient:", nx.transitivity(g))
# Local Cluster Coeff for a given node
target = 0
print("Clustering Coefficient:", nx.clustering(g, target))
# What's the average LCC?
nx.average_clustering(g)
# LCC Happens to be the same as the density of 1.5 ego-net w/o target
subg = nx.ego_graph(g, target, 1.5)
# Show the ego graph
nx.draw(subg, with_labels=True)
plt.show()
# Remove the target
subg.remove_node(target)
# show us the resulting graph
nx.draw(subg, with_labels=True)
plt.show()
# What's the density?
print("Density of 1.5-degree ego-net:", nx.density(subg))
Calculating the various centrality measures is easy.
# Degree centrality
centrality = nx.degree_centrality(g)
[(x, centrality[x]) for x in sorted(centrality, key=centrality.get, reverse=True)]
# Closeness centrality
centrality = nx.closeness_centrality(g)
[(x, centrality[x]) for x in sorted(centrality, key=centrality.get, reverse=True)]
# Betweenness centrality
centrality = nx.betweenness_centrality(g)
[(x, centrality[x]) for x in sorted(centrality, key=centrality.get, reverse=True)]
We can even store these centrality values as node attributes for use later...
nx.set_node_attributes(g, centrality, "centrality")
print("Nodes:")
for node in g.nodes(data=True):
print("\tNode:", node)
In general, NetworkX isn't good for visualizations, but it does have some rudimentary capabilities and layout algorithms.
# Draw with a spring layout
nx.draw(g, with_labels=True)
plt.show()
nx.draw_spring(g, with_labels=True)
plt.show()
nx.draw_random(g, with_labels=True)
plt.show()
nx.draw_circular(g, with_labels=True)
plt.show()
nx.draw_shell(g, with_labels=True)
plt.show()
nx.draw_spectral(g, with_labels=True)
plt.show()
NetworkX separates drawing from layout as well, as we see here.
# Construct node positions using FR layout (as in Gephi)
node_positions = nx.fruchterman_reingold_layout(g)
# Draw with the given node positions specified
nx.draw(g, pos=node_positions, with_labels=True)
plt.show()
You can get more advanced, but maybe we should use Gephi at this point...
pos = nx.spring_layout(g, iterations=200)
# Randomly color
nx.draw(g, pos, node_color=range(len(g.nodes())), node_size=800, cmap=plt.cm.Blues)
plt.show()
# Color by centrality
max_c = max(centrality.values())
color_map = {x[0]:x[1]/max_c for x in centrality.items()}
nx.draw(g, pos, node_color=list(color_map.values()), node_size=800, cmap=plt.cm.hot)
plt.show()
Getting into Gephi brings up a good question.
How do we export a graph from NetworkX?
NetworkX has lots of reading and writing capabilities:
# For simplicity, let's use GraphML
nx.write_graphml(g, "toy.graphml")