Skip to content

Commit 0822c5b

Browse files
committed
Adding slides by Tom
1 parent f26a203 commit 0822c5b

14 files changed

+172
-149
lines changed

_quarto.yml

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ website:
1515
menu:
1616
- text: Getting Started
1717
href: getstarted.qmd
18+
- text: Diffusion Theory
19+
href: theory.qmd
1820
- text: Introduction
1921
href: intro.qmd
2022
- text: Simulations

_site.yml

-26
This file was deleted.

figs/slides-attr-exposure.png

212 KB
Loading

figs/slides-centrality-expo.png

225 KB
Loading

figs/slides-diffusion-networks.png

134 KB
Loading

figs/slides-indirect-expo.png

266 KB
Loading

figs/slides-kfamilies.png

854 KB
Loading

figs/slides-struct-equiv.png

191 KB
Loading

figs/slides-threshold-net.png

239 KB
Loading

figs/slides-toa-example.png

184 KB
Loading

figs/slides-valente-2019.png

299 KB
Loading

figs/slides-valente-socnets.png

71.5 KB
Loading

sim.qmd

+102-56
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ plot_diffnet2(diffnet_rumor, vertex.size = dgr(diffnet_rumor)[,1], layout=pos)
114114

115115
# Difussion
116116

117-
```{r sim-complex}
117+
```{r sim-disease}
118118
set.seed(09)
119-
diffnet_complex <- rdiffnet(
119+
diffnet_disease <- rdiffnet(
120120
seed.graph = diffnet_rumor$graph,
121121
seed.nodes = which(diffnet_rumor$toa == 1),
122122
rewire = FALSE,
@@ -127,14 +127,20 @@ diffnet_complex <- rdiffnet(
127127
128128
```
129129

130-
```{r plot-complex-and-disease}
130+
```{r plot-disease-and-disease}
131131
plot_adopters(diffnet_rumor, what = "cumadopt", include.legend = FALSE)
132-
plot_adopters(diffnet_complex, bg="tomato", add=TRUE, what = "cumadopt")
133-
legend("topleft", legend = c("Disease", "Complex"), col = c("lightblue", "tomato"),
134-
bty = "n", pch=19)
132+
plot_adopters(diffnet_disease, bg="lightblue", add=TRUE, what = "cumadopt")
133+
legend(
134+
"topleft",
135+
legend = c("Disease", "Rumor"),
136+
col = c("lightblue", "tomato"),
137+
bty = "n", pch=19
138+
)
135139
```
136140

137141

142+
# Multi-diffusion models (TBD)
143+
138144
# Mentor Matching
139145

140146
```{r mentor-match, cache = TRUE}
@@ -145,26 +151,28 @@ mentors <- mentor_matching(diffnet_rumor, 25, lead.ties.method = "random")
145151
# Simulating diffusion with these mentors
146152
set.seed(09)
147153
diffnet_mentored <- rdiffnet(
148-
seed.graph = diffnet_complex,
154+
seed.graph = diffnet_disease,
149155
seed.nodes = which(mentors$`1`$isleader),
150156
rewire = FALSE,
151-
threshold.dist = diffnet_complex[["real_threshold"]],
157+
threshold.dist = diffnet_disease[["real_threshold"]],
152158
name = "Diffusion using Mentors"
153159
)
154160
155161
summary(diffnet_mentored)
156162
```
157163

158164
```{r toa_mat-mentors}
159-
cumulative_adopt_count(diffnet_complex)
165+
cumulative_adopt_count(diffnet_disease)
160166
cumulative_adopt_count(diffnet_mentored)
161167
```
162168

163169

164170
# Example by changing threshold
165171

166-
```{r sim-sim, cache = TRUE, collapse = TRUE}
172+
The following block of code runs multiple diffnet simulations. Before we proceed, we will generate a scale-free homophilic network:
167173

174+
```{r}
175+
#| label: scale-free-homophilic
168176
# Simulating a scale-free homophilic network
169177
set.seed(1231)
170178
X <- rep(c(1,1,1,1,1,0,0,0,0,0), 50)
@@ -174,7 +182,91 @@ net <- rgraph_ba(t = 499, m=4, eta = X)
174182
ig <- igraph::graph_from_adjacency_matrix(net)
175183
plot(ig, vertex.color = c("azure", "tomato")[X+1], vertex.label = NA,
176184
vertex.size = sqrt(dgr(net)))
185+
```
186+
187+
Besides of the usual parameters passed to `rdiffnet`, the `rdiffnet_multiple` function requires `R` (number of repetitions/simulations), and `statistic` (a function that returns the statistic of insterst). Optionally, the user may choose to specify the number of clusters to run it in parallel (multiple CPUs):
188+
189+
```{r rdiffnet-multiple}
190+
nsim <- 500L
191+
192+
ans_1and2 <- rdiffnet_multiple(
193+
# Num of sim
194+
R = nsim,
195+
# Statistic
196+
statistic = function(d) cumulative_adopt_count(d)["prop",],
197+
seed.graph = net,
198+
t = 10,
199+
threshold.dist = sample(1:2, 500L, TRUE),
200+
seed.nodes = "random",
201+
seed.p.adopt = .1,
202+
rewire = FALSE,
203+
exposure.args = list(outgoing=FALSE, normalized=FALSE),
204+
# Running on 4 cores
205+
ncpus = 4L
206+
) |> t()
207+
208+
ans_2and3 <- rdiffnet_multiple(
209+
# Num of sim
210+
R = nsim,
211+
# Statistic
212+
statistic = function(d) cumulative_adopt_count(d)["prop",],
213+
seed.graph = net,
214+
t = 10,
215+
threshold.dist = sample(2:3, 500, TRUE),
216+
seed.nodes = "random",
217+
seed.p.adopt = .1,
218+
rewire = FALSE,
219+
exposure.args = list(outgoing=FALSE, normalized=FALSE),
220+
# Running on 4 cores
221+
ncpus = 4L
222+
) |> t()
223+
224+
ans_1and3 <- rdiffnet_multiple(
225+
# Num of sim
226+
R = nsim,
227+
# Statistic
228+
statistic = function(d) cumulative_adopt_count(d)["prop",],
229+
seed.graph = net,
230+
t = 10,
231+
threshold.dist = sample(1:3, 500, TRUE),
232+
seed.nodes = "random",
233+
seed.p.adopt = .1,
234+
rewire = FALSE,
235+
exposure.args = list(outgoing=FALSE, normalized=FALSE),
236+
# Running on 4 cores
237+
ncpus = 4L
238+
) |> t()
239+
240+
```
241+
242+
```{r sim-sim-results}
243+
boxplot(ans_1and2, col="ivory", xlab = "Time", ylab = "Proportion of Adopters")
244+
boxplot(ans_2and3, col="tomato", add=TRUE)
245+
boxplot(ans_1and3, col = "steelblue", add=TRUE)
246+
legend(
247+
"topleft",
248+
fill = c("ivory", "tomato", "steelblue"),
249+
legend = c("1/2", "2/3", "1/3"),
250+
title = "Threshold range",
251+
bty ="n"
252+
)
253+
```
254+
255+
256+
* Example simulating a thousand networks by changing threshold levels.
257+
The final prevalence, or hazard as a function of threshold levels.
258+
259+
# Problems
260+
261+
1. Given the following types of networks: Small-world, Scale-free, Bernoulli,
262+
what set of $n$ initiators maximizes diffusion?
263+
(<a href="sim-solutions.r" target="_blank">solution script</a> and <a href="sim-solutions.png" target="_blank">solution plot</a>)
264+
265+
# Appendix
177266

267+
The following is example code that can be used to run multiple simulations like it is done using the `rdiffnet_multiple` function. We do not recommend this approach but it may be useful for some users:
268+
269+
```{r sim-sim, cache = TRUE, collapse = TRUE}
178270
# Now, simulating a bunch of diffusion processes
179271
nsim <- 500L
180272
ans_1and2 <- vector("list", nsim)
@@ -226,49 +318,3 @@ for (i in 1:nsim) {
226318
227319
ans_2and3 <- do.call(rbind, lapply(ans_2and3, "[", i="prop", j=))
228320
```
229-
230-
This can actually be simplified by using the function `rdiffnet_multiple`. The following lines of code accomplish the same as the previous code avoiding the for-loop (from the user's perspective). Besides of the usual parameters passed to `rdiffnet`, the `rdiffnet_multiple` function requires `R` (number of repetitions/simulations), and `statistic` (a function that returns the statistic of insterst). Optionally, the user may choose to specify the number of clusters to run it in parallel (multiple CPUs):
231-
232-
```{r rdiffnet-multiple}
233-
ans_1and3 <- rdiffnet_multiple(
234-
# Num of sim
235-
R = nsim,
236-
# Statistic
237-
statistic = function(d) cumulative_adopt_count(d)["prop",],
238-
seed.graph = net,
239-
t = 10,
240-
threshold.dist = sample(1:3, 500, TRUE),
241-
seed.nodes = "random",
242-
seed.p.adopt = .1,
243-
rewire = FALSE,
244-
exposure.args = list(outgoing=FALSE, normalized=FALSE),
245-
# Running on 4 cores
246-
ncpus = 4L
247-
)
248-
249-
```
250-
251-
```{r sim-sim-results}
252-
boxplot(ans_1and2, col="ivory", xlab = "Time", ylab = "Threshold")
253-
boxplot(ans_2and3, col="tomato", add=TRUE)
254-
boxplot(t(ans_1and3), col = "steelblue", add=TRUE)
255-
legend(
256-
"topleft",
257-
fill = c("ivory", "tomato", "steelblue"),
258-
legend = c("1/2", "2/3", "1/3"),
259-
title = "Threshold range",
260-
bty ="n"
261-
)
262-
```
263-
264-
265-
* Example simulating a thousand networks by changing threshold levels.
266-
The final prevalence, or hazard as a function of threshold levels.
267-
268-
# Problems
269-
270-
1. Given the following types of networks: Small-world, Scale-free, Bernoulli,
271-
what set of $n$ initiators maximizes diffusion?
272-
(<a href="sim-solutions.r" target="_blank">solution script</a> and <a href="sim-solutions.png" target="_blank">solution plot</a>)
273-
274-

stats.qmd

+68-67
Original file line numberDiff line numberDiff line change
@@ -63,73 +63,6 @@ knitr::opts_chunk$set(comment = "#")
6363
```
6464
6565
66-
67-
# Structural dependence and permutation tests
68-
69-
70-
- A novel statistical method (work-in-progress) that allows conducting inference.
71-
- Included in the package, tests whether a particular network statistic actually depends on network structure
72-
- Suitable to be applied to network thresholds (you can't use thresholds in regression-like models!)
73-
74-
## Idea
75-
76-
- Let $\mathcal{G} = (V,E)$ be a graph, $\gamma$ a vertex attribute, and $\beta = f(\gamma,\mathcal{G})$, then
77-
78-
$$\gamma \perp \mathcal{G} \implies \mathbb{E}\left[\beta(\gamma,\mathcal{G})|\mathcal{G}\right] = \mathbb{E}\left[\beta(\gamma,\mathcal{G})\right]$$
79-
80-
- This is, if for example time of adoption is independent on the structure of the network, then the average threshold level will be independent from the network structure as well.
81-
82-
- Another way of looking at this is that the test will allow us to see how probable is to have this combination of network structure and network threshold (if it is uncommon then we say that the diffusion model is highly likely)
83-
84-
85-
## Example Not random TOA
86-
87-
- To use this test, __netdiffuseR__ has the `struct_test` function.
88-
- Basically it simulates networks with the same density and computes a particular statistic every time, generating an EDF (Empirical Distribution Function) under the Null hyphothesis (p-values).
89-
90-
```{r Struct non-random-toa, cache=TRUE}
91-
# Simulating network
92-
set.seed(1123)
93-
net <- rdiffnet(n=500, t=10, seed.graph = "small-world")
94-
95-
# Running the test
96-
test <- struct_test(
97-
graph = net,
98-
statistic = function(x) mean(threshold(x), na.rm = TRUE),
99-
R = 1e3,
100-
ncpus=4, parallel="multicore"
101-
)
102-
103-
# See the output
104-
test
105-
```
106-
107-
```{r, echo=FALSE}
108-
hist(test)
109-
```
110-
111-
- Now we shuffle toas, so that is random
112-
113-
```{r random-toa, cache=TRUE}
114-
# Resetting TOAs (now will be completely random)
115-
diffnet.toa(net) <- sample(diffnet.toa(net), nnodes(net), TRUE)
116-
117-
# Running the test
118-
test <- struct_test(
119-
graph = net,
120-
statistic = function(x) mean(threshold(x), na.rm = TRUE),
121-
R = 1e3,
122-
ncpus=4, parallel="multicore"
123-
)
124-
125-
# See the output
126-
test
127-
```
128-
129-
```{r, echo=FALSE}
130-
hist(test)
131-
```
132-
13366
# Regression analysis
13467
13568
* In regression analysis we want to see if exposure, once we control for other
@@ -282,3 +215,71 @@ X <- cbind(X, toa=ifelse(toa == 0, NA, toa))
282215
save(X, W, file="stats.rda")
283216
```
284217

218+
219+
# Appendix
220+
221+
## Structural dependence and permutation tests
222+
223+
224+
- A novel statistical method (work-in-progress) that allows conducting inference.
225+
- Included in the package, tests whether a particular network statistic actually depends on network structure
226+
- Suitable to be applied to network thresholds (you can't use thresholds in regression-like models!)
227+
228+
### Idea
229+
230+
- Let $\mathcal{G} = (V,E)$ be a graph, $\gamma$ a vertex attribute, and $\beta = f(\gamma,\mathcal{G})$, then
231+
232+
$$\gamma \perp \mathcal{G} \implies \mathbb{E}\left[\beta(\gamma,\mathcal{G})|\mathcal{G}\right] = \mathbb{E}\left[\beta(\gamma,\mathcal{G})\right]$$
233+
234+
- This is, if for example time of adoption is independent on the structure of the network, then the average threshold level will be independent from the network structure as well.
235+
236+
- Another way of looking at this is that the test will allow us to see how probable is to have this combination of network structure and network threshold (if it is uncommon then we say that the diffusion model is highly likely)
237+
238+
239+
### Example Not random TOA
240+
241+
- To use this test, __netdiffuseR__ has the `struct_test` function.
242+
- Basically it simulates networks with the same density and computes a particular statistic every time, generating an EDF (Empirical Distribution Function) under the Null hyphothesis (p-values).
243+
244+
```{r Struct non-random-toa, cache=TRUE}
245+
# Simulating network
246+
set.seed(1123)
247+
net <- rdiffnet(n=500, t=10, seed.graph = "small-world")
248+
249+
# Running the test
250+
test <- struct_test(
251+
graph = net,
252+
statistic = function(x) mean(threshold(x), na.rm = TRUE),
253+
R = 1e3,
254+
ncpus=4, parallel="multicore"
255+
)
256+
257+
# See the output
258+
test
259+
```
260+
261+
```{r, echo=FALSE}
262+
hist(test)
263+
```
264+
265+
- Now we shuffle toas, so that is random
266+
267+
```{r random-toa, cache=TRUE}
268+
# Resetting TOAs (now will be completely random)
269+
diffnet.toa(net) <- sample(diffnet.toa(net), nnodes(net), TRUE)
270+
271+
# Running the test
272+
test <- struct_test(
273+
graph = net,
274+
statistic = function(x) mean(threshold(x), na.rm = TRUE),
275+
R = 1e3,
276+
ncpus=4, parallel="multicore"
277+
)
278+
279+
# See the output
280+
test
281+
```
282+
283+
```{r, echo=FALSE}
284+
hist(test)
285+
```

0 commit comments

Comments
 (0)