Take-home Exercise 6

Network Analysis

LI HONGYI (SMU SCIS)https://scis.smu.edu.sg
2022-06-05

Overview

In this take-home exercise, the main task is to visualize the social network of Ohio, USA.

install packages

packages = c('igraph','tidygraph','ggraph','visNetwork',
             'lubridate','clock','tidyverse','graphlayouts')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

import data

wrangling attribute

edge_aggregated <- 
  read_rds("data/rds/edge_aggregated.rds")
nodes_aggregated <- 
  read_rds("data/rds/nodes_aggregated.rds")

Buildling graph model

cgraph <- graph_from_data_frame(edge_aggregated,
                                vertices = nodes_aggregated) %>%
  as_tbl_graph
cgraph
# A tbl_graph: 763 nodes and 4210 edges
#
# A directed simple graph with 24 components
#
# Node Data: 763 x 3 (active)
  name  educationLevel      interestGroup
  <chr> <chr>               <chr>        
1 2     HighSchoolOrCollege A            
2 4     Bachelors           H            
3 5     HighSchoolOrCollege D            
4 6     HighSchoolOrCollege I            
5 7     Bachelors           A            
6 8     Bachelors           G            
# ... with 757 more rows
#
# Edge Data: 4,210 x 3
   from    to Weight
  <int> <int>  <int>
1     1    56    244
2     1   146    279
3     2    24    206
# ... with 4,207 more rows

Ploting the graph

social network (weight>200, color = interest group)

ggraph(cgraph,
       layout = "stress") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=interestGroup, size = 1)) +
  theme_graph()

ggraph(cgraph,
       layout = "nicely") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=interestGroup, size = 1)) +
  theme_graph()

social network (weight >200, color = education level)

ggraph(cgraph,
       layout = "nicely") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=educationLevel, size = 1)) +
  theme_graph()

set_graph_style()

g<- ggraph(cgraph,
           layout = "nicely") + 
  geom_edge_link(aes(width = Weight),
                 alpha = 0.2) +
  scale_edge_width(range = c(0.5, 2)) + 
  geom_node_point(
    aes(colour=interestGroup), size = 2) 
 
g + facet_nodes(~educationLevel)+ 
    th_foreground(foreground = "grey80",
                  border = TRUE)+
  theme(legend.position = "bottom")

set_graph_style()

g<- ggraph(cgraph,
           layout = "nicely") + 
  geom_edge_link(aes(width = Weight),
                 alpha = 0.2) +
  scale_edge_width(range = c(0.5, 2)) + 
  geom_node_point(
    aes(colour=educationLevel), size = 2) 
 
g + facet_nodes(~interestGroup)+ 
    th_foreground(foreground = "grey80",
                  border = TRUE)+
  theme(legend.position = "bottom")

Social Network with more frequent interactivity (>300)

participants <- 
  read_rds("data/rds/participants.rds")
edge_aggregated_less <- edge_aggregated %>%
  filter(Weight > 300)

nodes_less <- edge_aggregated_less %>% 
  select(participantIdFrom) %>%
  group_by(participantIdFrom) %>%
  summarise(Weight = n()) %>%
  select(participantIdFrom) %>% 
  rename(participantId = participantIdFrom)

nodes_aggregated_less <- merge(x = nodes_less, y = participants, by = "participantId")
cgraph_less <- graph_from_data_frame(edge_aggregated_less,
                                vertices = nodes_aggregated_less) %>%
  as_tbl_graph
cgraph_less
# A tbl_graph: 588 nodes and 2168 edges
#
# A directed simple graph with 74 components
#
# Node Data: 588 x 3 (active)
  name  educationLevel      interestGroup
  <chr> <chr>               <chr>        
1 4     Bachelors           H            
2 5     HighSchoolOrCollege D            
3 7     Bachelors           A            
4 8     Bachelors           G            
5 10    HighSchoolOrCollege D            
6 12    HighSchoolOrCollege D            
# ... with 582 more rows
#
# Edge Data: 2,168 x 3
   from    to Weight
  <int> <int>  <int>
1     1    54    355
2     1    71    315
3     1   149    397
# ... with 2,165 more rows
ggraph(cgraph_less,
       layout = "stress") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=interestGroup, size = 1)) +
  theme_graph()

ggraph(cgraph_less,
       layout = "nicely") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=educationLevel, size = 1)) +
  theme_graph()

set_graph_style()

g<- ggraph(cgraph_less,
           layout = "nicely") + 
  geom_edge_link(aes(width = Weight),
                 alpha = 0.2) +
  scale_edge_width(range = c(0.5, 2)) + 
  geom_node_point(
    aes(colour=interestGroup), size = 2) 
 
g + facet_nodes(~educationLevel)+ 
    th_foreground(foreground = "grey80",
                  border = TRUE)+
  theme(legend.position = "bottom")

Social Network with more frequent interactivity (>400)

edge_aggregated_least <- edge_aggregated %>%
  filter(Weight > 400)

nodes_least <- edge_aggregated_least %>% 
  select(participantIdFrom) %>%
  group_by(participantIdFrom) %>%
  summarise(Weight = n()) %>%
  select(participantIdFrom) %>% 
  rename(participantId = participantIdFrom)

nodes_aggregated_least <- merge(x = nodes_least, y = participants, by = "participantId")
cgraph_least <- graph_from_data_frame(edge_aggregated_least,
                                vertices = nodes_aggregated_least) %>%
  as_tbl_graph
cgraph_least
# A tbl_graph: 355 nodes and 720 edges
#
# A directed simple graph with 108 components
#
# Node Data: 355 x 3 (active)
  name  educationLevel      interestGroup
  <chr> <chr>               <chr>        
1 8     Bachelors           G            
2 10    HighSchoolOrCollege D            
3 12    HighSchoolOrCollege D            
4 14    Graduate            H            
5 18    Graduate            I            
6 20    HighSchoolOrCollege I            
# ... with 349 more rows
#
# Edge Data: 720 x 3
   from    to Weight
  <int> <int>  <int>
1     1    13    450
2     2    42    432
3     3   215    449
# ... with 717 more rows
ggraph(cgraph_least,
       layout = "stress") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=interestGroup, size = 1)) +
  theme_graph()

ggraph(cgraph_least,
       layout = "nicely") +
  geom_edge_link(aes(width = Weight),
                 alpha = 0.1) +
  scale_edge_width(range = c(0.5, 1.5)) + 
  geom_node_point(
    aes(colour=interestGroup, size = 1)) +
  theme_graph()