comp-think/2020-2021

Lecture "Organising information: graphs", exercise 1

essepuntato opened this issue · 15 comments

Consider the list of co-authors of Tim Berners-Lee as illustrated in the right box at http://dblp.uni-trier.de/pers/hd/b/Berners=Lee:Tim. Build an undirected graph that contains Tim Berners Lee as the central node, and that links to five nodes representing his top-five co-authors. Also, specify the weight of each edge as an attribute, where the value of the weight is the number of bibliographic resources (articles, proceedings, etc.) Tim Berners-Lee has co-authored with the person linked by that edge.

from networkx import Graph

Tim_Berners_Lee_Collaborations= Graph()
Tim_Berners_Lee_Collaborations.add_node("Tim Berners Lee")
Tim_Berners_Lee_Collaborations.add_node("Tom Heath")
Tim_Berners_Lee_Collaborations.add_node("Christian Bizer")
Tim_Berners_Lee_Collaborations.add_node("Sören Auer")
Tim_Berners_Lee_Collaborations.add_node("Lalana Kagal")
Tim_Berners_Lee_Collaborations.add_node("James A. Hendler")

Tim_Berners_Lee_Collaborations.add_edge("Tim Berners Lee", "Tom Heath", weight = 18)
Tim_Berners_Lee_Collaborations.add_edge("Tim Berners Lee", "Christian Bizer", weight = 18)
Tim_Berners_Lee_Collaborations.add_edge("Tim Berners Lee", "Sören Auer", weight = 10)
Tim_Berners_Lee_Collaborations.add_edge("Tim Berners Lee", "Lalana Kagal", weight = 9)
Tim_Berners_Lee_Collaborations.add_edge("Tim Berners Lee", "James A. Hendler", weight = 8)

I was looking at the networkx documentation in order to also have some kind of visualization of the graphs. I discovered a very simple way, which requires an additional package and which uses a different way to import the networkx package, but I think that being able to visualize what is actually happening in the code may be useful.

import matplotlib.pyplot as plt
import networkx as nx

Tim_BS = nx.Graph()


Tim_BS.add_edge("Tim Berners-Lee", "Tom Heath", weight= 18)
Tim_BS.add_edge("Tim Berners-Lee", "Christian Bizer", weight= 18)
Tim_BS.add_edge("Tim Berners-Lee", "Sören Auerr", weight= 10)
Tim_BS.add_edge("Tim Berners-Lee", "Lalana Kaga", weight= 9)
Tim_BS.add_edge("Tim Berners-Lee", "James A.Hendler", weight= 8)


nx.draw_networkx(Tim_BS)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
plt.show()

This is the final result with the most basic settings, but there are thousands of options to customize its appereance.
graph Tim BS

from networkx import Graph

TBL_graph = Graph()

TBL_graph.add_node("Tim Berners-Lee")
TBL_graph.add_node("Tom Heath")
TBL_graph.add_node("Christian Bizer")
TBL_graph.add_node("Sören Auer")
TBL_graph.add_node("Lalana Kagal")
TBL_graph.add_node("James A. Hendler")

TBL_graph.add_edge("Tim Berners-Lee", "Tom Heath", weight=18)
TBL_graph.add_edge("Tim Berners-Lee", "Christian Bizer", weight=18)
TBL_graph.add_edge("Tim Berners-Lee", "Sören Auer", weight=10)
TBL_graph.add_edge("Tim Berners-Lee", "Lalana Kagal", weight=9)
TBL_graph.add_edge("Tim Berners-Lee", "James A. Hendler", weight=8)

print(TBL_graph.edges(data=True))

from networkx import Graph

TBL="Tim Berners-Lee"

TBL_coauthors=Graph()

TBL_coauthors.add_node(TBL)
TBL_coauthors.add_edge(TBL,"Tom Heath", weight=18)
TBL_coauthors.add_edge(TBL,"Christian Bizer", weight=18)
TBL_coauthors.add_edge(TBL,"Sören Auer", weight=10)
TBL_coauthors.add_edge(TBL,"Lalana Kagal", weight=9)
TBL_coauthors.add_edge(TBL,"James A. Hendler", weight=8)

print(TBL_coauthors.edges(data=True))
from networkx import Graph


mygraph= Graph()
mygraph.add_node("Tim Berners Lee")
mygraph.add_node("Tom Heath")
mygraph.add_node("Christian Bizer")
mygraph.add_node("Soren Auer")
mygraph.add_node("Lalana Kagal")
mygraph.add_node("James A.Hendler")
mygraph.add_edge("Tim Berners Lee", "Tom Heath",weight=18)
mygraph.add_edge("Tim Berners Lee","Christian Bizer" ,weight=18)
mygraph.add_edge("Tim Berners Lee", "Soren Auer",weight=10)
mygraph.add_edge("Tim Berners Lee", "Lalana Kagal",weight=9)
mygraph.add_edge("Tim Berners Lee", "James A.Hendler",weight=8)

from networkx import MultiGraph
from networkx import Graph

my_graph = Graph()

my_graph.add_node("Tim Berners-Lee")
my_graph.add_node("Tom Heath")
my_graph.add_node("Christian Bizer")
my_graph.add_node("Sören Auer")
my_graph.add_node("Lalana Kagal")
my_graph.add_node("James Hendler")

my_graph.add_edge("Tim Berners-Lee", "Tom Heath", weight=18)
my_graph.add_edge("Tim Berners-Lee", "Christian Bizer", weight=18)
my_graph.add_edge("Tim Berners-Lee", "Sören Auer", weight=10)
my_graph.add_edge("Tim Berners-Lee", "Lalana Kagal", weight=9)
my_graph.add_edge("Tim Berners-Lee", "James A. Hendler", weight=8)

from networkx import Graph

tbl_graph = Graph()
tbl_graph.add_node("Tim Berners Lee")
tbl_graph.add_node("Tom Heath")
tbl_graph.add_node("Christian Bizer")
tbl_graph.add_node("Sören Auer")
tbl_graph.add_node("Lalana Kagal")
tbl_graph.add_node("James A. Hendler")

tbl_graph.add_edge("Tim Berners Lee", "Tom Heath", weight=18)
tbl_graph.add_edge("Tim Berners Lee", "Christian Bizer", weight=18)
tbl_graph.add_edge("Tim Berners Lee", "Sören Auer", weight=10)
tbl_graph.add_edge("Tim Berners Lee", "Lalana Kagal", weight=9)
tbl_graph.add_edge("Tim Berners Lee", "James A. Hendler", weight=8)

import networkx as nx
from networkx import Graph
import matplotlib.pyplot as plt
mygraph = Graph()
# central node
mygraph.add_node("Tim Berners Lee")
#other nodes
mygraph.add_node("Tom Heath")   #18
mygraph.add_node("Christian Bizer")  #18
mygraph.add_node("Sören Auer")  #10
mygraph.add_node("Lalana Kagal")  #9
mygraph.add_node("James A. Hendler")  #8
#relations
mygraph.add_edge("Tim Berners Lee", "Tom Heath", weight = 18)
mygraph.add_edge("Tim Berners Lee", "Christian Bizer", weight = 18)
mygraph.add_edge("Tim Berners Lee", "Sören Auer", weight = 10)
mygraph.add_edge("Tim Berners Lee", "Lalana Kagal", weight = 9)
mygraph.add_edge("Tim Berners Lee", "James A. Hendler", weight = 8)

#visualize
nx.draw_networkx(mygraph)
ax = plt.gca()
ax.margins(0.20)
plt.axis("off")
plt.show()

tnx to @AleRosae for the visualizing method!

I spent some time perfecting my list scraping just to find out that we were supposed to only add the first 5 co-authors... I'll paste all the code I used to clean the list and create the whole graph anyway 🤦🏻‍♂️

co_authors = '''
Tom Heath (18)
Christian Bizer (18)
Sören Auer (10)
Lalana Kagal (9)
James A. Hendler (8)
Daniel J. Weitzner (8)
Nigel Shadbolt (7)
Sarven Capadisli (6)
Roy T. Fielding (6)
Dan Connolly (6)
Wendy Hall (6)
Michael Hausenblas (5)
Henrik Frystyk Nielsen (5)
Kingsley Idehen (4)
Krzysztof Janowicz (4)
Larry Masinter (4)
Robert Cailliau (4)
M. C. schraefel (4)
Ruben Verborgh (3)
Kieron O'Hara (3)
Jean-François Groff (3)
Jens Lehmann 0001 (3)
Gerald J. Sussman (3)
Andrei Vlad Sambra (3)
Yosi Scharf (3)
Ashraf Aboulnaga (2)
Jeffrey C. Mogul (2)
Aidan Hogan (2)
Eric Prud'hommeaux (2)
Bernd Pollermann (2)
Chris Hanson (2)
James Gettys (2)
Sandro Hawke (2)
Harold Abelson (2)
Amy Guy (2)
Christoph Lange 0002 (2)
Nicholas Gibbins (2)
Peter Szolovits (1)
J. Hollenbach (1)
Raf Buyle (1)
Robert Jacobs (1)
Arthur Secret (1)
Claire Hart (1)
Ruben Taelman (1)
Ryen W. White (1)
Stefan Dietze (1)
Sunny Consolvo (1)
Tobias Kuhn (1)
Tope Omitola (1)
John Domingue (1)
V. Richard Benjamins (1)
Yang Yang (1)
Ben Shneiderman (1)
Desney S. Tan (1)
Kanghao Lu (1)
Anastasia Dimou (1)
Katrien Mostaert (1)
Erik Mannens (1)
Paul J. Leach (1)
Isaac S. Kohane (1)
Christos L. Koumenides (1)
J. Presbrey (1)
Essam Mansour (1)
Lena Mamykina (1)
Gary Marsden (1)
Luis-Daniel Ibá (1)
Maged Zereba (1)
Manuel Salvadores (1)
Maria-Esther Vidal (1)
Geroen Joris (1)
Maribel Acosta (1)
Mark Fischetti (1)
Mark P. McCahill (1)
Martin Szomszor (1)
Joan Feigenbaum (1)
Deborah L. McGuinness (1)
Hugh Glaser (1)
K. Krasnow Waterman (1)
Oshani Seneviratne (1)
Abdurrahman Ghanem (1)
Amrapali Zaveri (1)
Ari Luotonen (1)
Christopher A. Le Dantec (1)
Igor O. Popov (1)
Paul André (1)
'''
authors_list = co_authors.split("\n")
for i in authors_list:
    if i == '':
        authors_list.remove(i)
print(authors_list)


from networkx import Graph

g = Graph()
g.add_node("Tim Berners-Lee")
for i in authors_list:
    if i[-3] == "(":
        g.add_node(i[:-4])
        g.add_edge("Tim Berners-Lee", i[:-4], weight=i[-2])
    elif i[-4] == "(":
        g.add_node(i[:-5])
        g.add_edge("Tim Berners-Lee", i[:-5], weight=i[-3:-1])
print(g.edges(data=True))
print(g.nodes(data=True))

from networkx import MultiGraph

my_graph=MultiGraph()

my_graph.add_node('Tim Berners Lee')
my_graph.add_node('Tom Heath')
my_graph.add_node('Christian Bizer')
my_graph.add_node('Soren Auer')
my_graph.add_node('Lalana Kagal')
my_graph.add_node('James A.Hendler')

my_graph.add_edge('Tim Berners Lee','Tom Heath', weight=18)
my_graph.add_edge('Tim Berners Lee','Christian Bizer', weight=18)
my_graph.add_edge('Tim Berners Lee','Soren Auer', weight=10)
my_graph.add_edge('Tim Berners Lee','Lalana Kagal', weight=9)
my_graph.add_edge('Tim Berners Lee','James A.Hendler', weight=8)

print(my_graph.adj['Tim Berners Lee'])

from networkx import Graph

my_node = Graph()

my_node.add_node("Tim Berners Lee")

my_node.add_node("Tom Heath")
my_node.add_node("Christian Bizes")
my_node.add_node("Sören Auer")
my_node.add_node("Lalana Kagal")
my_node.add_node("James A. Hendler")

my_node.add_edge("Tim Berners Lee", "Tom Heath", weight=18)
my_node.add_edge("Tim Berners Lee", "Christian Bizes", weight=18)
my_node.add_edge("Tim Berners Lee", "Sören Auer", weight=10)
my_node.add_edge("Tim Berners Lee", "Lalana Kagal", weight=9)
my_node.add_edge("Tim Berners Lee", "James A. Hendler", weight=8)


z = my_node.edges(data=True)
print(z)
# OUTPUT: [('Tim Berners Lee', 'Tom Heath', {'weight': 18}),
# ('Tim Berners Lee', 'Christian Bizes', {'weight': 18}),
# ('Tim Berners Lee', 'Sören Auer', {'weight': 10}),
# ('Tim Berners Lee', 'Lalana Kagal', {'weight': 9}),
# ('Tim Berners Lee', 'James A. Hendler', {'weight': 8})]

using matplotlib to create a visualization (saved as a file in this case).
One thing i can't seem to figure out tho is how to visualize the weights. I've spent some time looking around and playing with some of the nx.draw options but it seems to require pos values and I can't figure them out for the life of me. Best I managed was creating a linear plot which is not really what I want.

import networkx as nx
import matplotlib.pyplot as plt

TBL_graph = nx.Graph()

TBL_graph.add_node("Tim Berners-Lee")
TBL_graph.add_node("Tom Heath")
TBL_graph.add_node("Christian Bizer")
TBL_graph.add_node("Sören Auer")
TBL_graph.add_node("Lalana Kagal")
TBL_graph.add_node("James A. Hendler")

TBL_graph.add_edge("Tim Berners-Lee", "Tom Heath", weight=18)
TBL_graph.add_edge("Tim Berners-Lee", "Christian Bizer", weight=18)
TBL_graph.add_edge("Tim Berners-Lee", "Sören Auer", weight=10)
TBL_graph.add_edge("Tim Berners-Lee", "Lalana Kagal", weight=9)
TBL_graph.add_edge("Tim Berners-Lee", "James A. Hendler", weight=8)

print(TBL_graph.adj["Tim Berners-Lee"])
nx.draw(TBL_graph, with_labels=True)
plt.savefig("TBL_graph.png")

from networkx import Graph

Tim_bib = Graph()

Tim_bib.add_node("Tim Berners-Lee")
Tim_bib.add_node("Tom Heath")
Tim_bib.add_node("Christian Bizer")
Tim_bib.add_node("Sören Auer")
Tim_bib.add_node("Lalana Kagal")
Tim_bib.add_node("James A. Hendler")

Tim_bib.add_edge("Tim Berners-Lee", "Tom Heath", weight=18)
Tim_bib.add_edge("Tim Berners-Lee", "Christian Bizer", weight=18)
Tim_bib.add_edge("Tim Berners-Lee", "Sören Auer", weight=10)
Tim_bib.add_edge("Tim Berners-Lee", "Lalana Kagal", weight=9)
Tim_bib.add_edge("Tim Berners-Lee", "James A. Hendler", weight=8)

Is it possible to print both nodes and edges attributes in the same result?

from networkx import Graph

tim_graph = Graph()
tim_graph.add_node(1, name="Tim", surname="Berners Lee")
tim_graph.add_node(2, name="Tom", surname="Heath")
tim_graph.add_node(3, name="Christian", surname="Bizer")
tim_graph.add_node(4, name="Sören", surname="Auer")
tim_graph.add_node(5, name="Lalana", surname="Kagal")
tim_graph.add_node(6, name="James A.", surname="Hendler")
tim_graph.add_edge(1, 2, weight=18)
tim_graph.add_edge(1, 3, weight=18)
tim_graph.add_edge(1, 4, weight=10)
tim_graph.add_edge(1, 5, weight=9)
tim_graph.add_edge(1, 6, weight=8)

print(tim_graph.nodes(data=True))
print(tim_graph.adj[1])
#Tom Heath (18)
#Christian Bizer (18)
#Sören Auer (10)
#Lalana Kagal (9)
#James A. Hendler (8)


from networkx import Graph

list_TBL = Graph()
list_TBL.add_node("Tim Berners Lee")
list_TBL.add_node("Tom Heath")
list_TBL.add_node("Christian Bizer")
list_TBL.add_node("Sören Auer")
list_TBL.add_node("Lalana Kagal")
list_TBL.add_node("James A. Hendler")

list_TBL.add_edge("Tim Berners Lee","Tom Heath", weight = 18)
list_TBL.add_edge("Tim Berners Lee","Christian Bizer", weight = 18)
list_TBL.add_edge("Tim Berners Lee","Sören Auer", weight = 10)
list_TBL.add_edge("Tim Berners Lee","Lalana Kagal", weight = 9)
list_TBL.add_edge("Tim Berners Lee","James A. Hendler", weight = 8)