Network Analysis
In this take-home exercise, the main task is to visualize the social network of Ohio, USA.
packages = c('igraph','tidygraph','ggraph','visNetwork',
'lubridate','clock','tidyverse','graphlayouts')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
edge_aggregated <-
read_rds("data/rds/edge_aggregated.rds")
nodes_aggregated <-
read_rds("data/rds/nodes_aggregated.rds")
cgraph <- graph_from_data_frame(edge_aggregated,
vertices = nodes_aggregated) %>%
as_tbl_graph
cgraph
# A tbl_graph: 763 nodes and 4210 edges
#
# A directed simple graph with 24 components
#
# Node Data: 763 x 3 (active)
name educationLevel interestGroup
<chr> <chr> <chr>
1 2 HighSchoolOrCollege A
2 4 Bachelors H
3 5 HighSchoolOrCollege D
4 6 HighSchoolOrCollege I
5 7 Bachelors A
6 8 Bachelors G
# ... with 757 more rows
#
# Edge Data: 4,210 x 3
from to Weight
<int> <int> <int>
1 1 56 244
2 1 146 279
3 2 24 206
# ... with 4,207 more rows
ggraph(cgraph,
layout = "stress") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=interestGroup, size = 1)) +
theme_graph()
ggraph(cgraph,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=interestGroup, size = 1)) +
theme_graph()
ggraph(cgraph,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=educationLevel, size = 1)) +
theme_graph()
set_graph_style()
g<- ggraph(cgraph,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.2) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(
aes(colour=interestGroup), size = 2)
g + facet_nodes(~educationLevel)+
th_foreground(foreground = "grey80",
border = TRUE)+
theme(legend.position = "bottom")
set_graph_style()
g<- ggraph(cgraph,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.2) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(
aes(colour=educationLevel), size = 2)
g + facet_nodes(~interestGroup)+
th_foreground(foreground = "grey80",
border = TRUE)+
theme(legend.position = "bottom")
participants <-
read_rds("data/rds/participants.rds")
edge_aggregated_less <- edge_aggregated %>%
filter(Weight > 300)
nodes_less <- edge_aggregated_less %>%
select(participantIdFrom) %>%
group_by(participantIdFrom) %>%
summarise(Weight = n()) %>%
select(participantIdFrom) %>%
rename(participantId = participantIdFrom)
nodes_aggregated_less <- merge(x = nodes_less, y = participants, by = "participantId")
cgraph_less <- graph_from_data_frame(edge_aggregated_less,
vertices = nodes_aggregated_less) %>%
as_tbl_graph
cgraph_less
# A tbl_graph: 588 nodes and 2168 edges
#
# A directed simple graph with 74 components
#
# Node Data: 588 x 3 (active)
name educationLevel interestGroup
<chr> <chr> <chr>
1 4 Bachelors H
2 5 HighSchoolOrCollege D
3 7 Bachelors A
4 8 Bachelors G
5 10 HighSchoolOrCollege D
6 12 HighSchoolOrCollege D
# ... with 582 more rows
#
# Edge Data: 2,168 x 3
from to Weight
<int> <int> <int>
1 1 54 355
2 1 71 315
3 1 149 397
# ... with 2,165 more rows
ggraph(cgraph_less,
layout = "stress") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=interestGroup, size = 1)) +
theme_graph()
ggraph(cgraph_less,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=educationLevel, size = 1)) +
theme_graph()
set_graph_style()
g<- ggraph(cgraph_less,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.2) +
scale_edge_width(range = c(0.5, 2)) +
geom_node_point(
aes(colour=interestGroup), size = 2)
g + facet_nodes(~educationLevel)+
th_foreground(foreground = "grey80",
border = TRUE)+
theme(legend.position = "bottom")
edge_aggregated_least <- edge_aggregated %>%
filter(Weight > 400)
nodes_least <- edge_aggregated_least %>%
select(participantIdFrom) %>%
group_by(participantIdFrom) %>%
summarise(Weight = n()) %>%
select(participantIdFrom) %>%
rename(participantId = participantIdFrom)
nodes_aggregated_least <- merge(x = nodes_least, y = participants, by = "participantId")
cgraph_least <- graph_from_data_frame(edge_aggregated_least,
vertices = nodes_aggregated_least) %>%
as_tbl_graph
cgraph_least
# A tbl_graph: 355 nodes and 720 edges
#
# A directed simple graph with 108 components
#
# Node Data: 355 x 3 (active)
name educationLevel interestGroup
<chr> <chr> <chr>
1 8 Bachelors G
2 10 HighSchoolOrCollege D
3 12 HighSchoolOrCollege D
4 14 Graduate H
5 18 Graduate I
6 20 HighSchoolOrCollege I
# ... with 349 more rows
#
# Edge Data: 720 x 3
from to Weight
<int> <int> <int>
1 1 13 450
2 2 42 432
3 3 215 449
# ... with 717 more rows
ggraph(cgraph_least,
layout = "stress") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=interestGroup, size = 1)) +
theme_graph()
ggraph(cgraph_least,
layout = "nicely") +
geom_edge_link(aes(width = Weight),
alpha = 0.1) +
scale_edge_width(range = c(0.5, 1.5)) +
geom_node_point(
aes(colour=interestGroup, size = 1)) +
theme_graph()