MLContinuous/CS229DenseModel.R at master · erayturkel/MLContinuous · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

generateData<-function(n,p,alpha){
  X = matrix(rnorm(n * p), n, p)
  nu<-rnorm(n)
  t<-pmax(X[,1]*alpha+nu,0)
  for(i in 1:p){
    for(j in 1:p){
      X<-cbind(X,X[,i]*X[,j])
    }
  }
  X<-cbind(X,t)
  X<-cbind(X,t^2)
  X<-cbind(X,t^3)
  return(X)
}

generateCoefficients<-function(treatStrength, sparsity,X){
  coef<-dim(X)[2]
  beta<-rnorm(coef)
  if(sparsity>0){
    indexzero<-sample(1:(coef-3))[1:floor(sparsity*(coef-3))]
    beta[indexzero]=0
  }
  beta[coef-2]<-treatStrength
  beta[coef-1]<-1/2*treatStrength
  beta[coef]<-1/8*treatStrength
  return(beta)
}

generateOutcome<-function(X,beta){
  n<-dim(X)[1]
  eps<-rnorm(n)
  Y= X%*%beta + eps
  return(Y)
}
p=20
trtstr=3

X<-generateData(5000,p,1,0.8)
beta<-generateCoefficients(trtstr,0,X)
Y<-generateOutcome(X,beta)
t=X[,dim(X)[2]-2]
plot(X[,1],t)
plot(t,Y)
tmax<-as.numeric(quantile(t,0.9))

trueResponse<-seq(0,tmax,0.05)*trtstr+seq(0,tmax,0.05)^2*trtstr*1/2+seq(0,tmax,0.05)^3*trtstr*(1/8)


ggplot()+geom_point(aes(x=t,y=Y))+geom_point(aes(x=seq(0,tmax,0.05),y=trueResponse),color="red")+coord_cartesian(xlim=c(0,max(t)*1/4))
hist(t)


linmodel<-lm(t~ -1+X[,1:p])
coefsFromModel<-linmodel$coefficients
coefsFromModel<-as.vector(coefsFromModel)
meanVector<-X[,1:p]%*%coefsFromModel


propensities<-dnorm(t-meanVector)
plot(t,propensities,xlim=c(0,10))
Estindex<-(propensities>0.05&propensities<0.95)

EstT<-t[Estindex]
EstX<-X[Estindex,]
EstY<-Y[Estindex]
EstP<-propensities[Estindex]


#HighOrderLinear
linmod<-lm(EstY~EstT+I(EstT^2)+EstP+I(EstP^2)+EstT*EstP)


EstResponse<-data.frame(t=0,est=0)
for(treat in seq(0,tmax,0.05)){
  EstP<-dnorm(rep(treat,length(Y))-meanVector)
  Estindex<-(EstP>0.05&EstP<0.95)
  EstP<-EstP[Estindex]
  EstT<-rep(treat,length(EstP))
  EstTsq<-rep(treat^2,length(EstP))
  EstPsq<-EstP^2
  estResp<-mean(predict(linmod,newdata=data.frame(EstT=EstT,EstP=EstP)))
  EstResponse<-rbind(EstResponse,c(treat,estResp))
}


ggplot()+geom_point(aes(x=seq(0,tmax,0.05),y=trueResponse),color="red")+geom_point(aes(x=EstResponse$t,y=EstResponse$est),color="blue")


##Gradient Boosting
library(gbm)

propensities<-dnorm(t-meanVector)
Estindex<-(propensities>0.05&propensities<0.95)
EstT<-t[Estindex]
EstX<-X[Estindex,]
EstY<-Y[Estindex]
EstP<-propensities[Estindex]
EstFrame<-data.frame(EstY=EstY,EstT=EstT,EstP=EstP,EstTsq=EstT^2,EstPsq=EstP^2,Intr=EstT*EstP)

modelGBM<-gbm(EstY~.,data=EstFrame,distribution = "gaussian",n.trees = 5000,interaction.depth = 3,cv.folds = 5)
besttrees<-gbm.perf(modelGBM,method = "cv")


EstResponseGBM<-data.frame(t=0,est=0)
for(treat in seq(0,tmax,0.05)){
  EstP<-dnorm(rep(treat,length(Y))-meanVector)
  Estindex<-(EstP>0.05&EstP<0.95)
  EstP<-EstP[Estindex]
  EstT<-rep(treat,length(EstP))
  EstFrame<-data.frame(EstT=EstT,EstP=EstP,EstTsq=EstT^2,EstPsq=EstP^2,Intr=EstT*EstP)
  estResp<-mean(predict(modelGBM,newdata=EstFrame,num.trees=besttrees))
  EstResponseGBM<-rbind(EstResponseGBM,c(treat,estResp))
}


ggplot()+geom_point(aes(x=seq(0,tmax,0.05),y=trueResponse),color="red")+geom_point(aes(x=EstResponseGBM$t,y=EstResponseGBM$est),color="yellow")+geom_point(aes(x=EstResponse$t,y=EstResponse$est),color="blue")


#NEURAL NETS
library(neuralnet)

propensities<-dnorm(t-meanVector)
Estindex<-(propensities>0.05&propensities<0.95)
EstT<-t[Estindex]
EstX<-X[Estindex,]
EstY<-Y[Estindex]
EstP<-propensities[Estindex]
EstFrame<-data.frame(EstY=EstY,EstT=EstT,EstP=EstP,EstTsq=EstT^2,EstPsq=EstP^2,Intr=EstT*EstP)


modelNNET<-neuralnet(EstY~EstT+EstP+EstTsq+EstPsq+Intr, data= EstFrame ,rep=3,stepmax=100000,threshold=30,act.fct="tanh",hidden = c(4,2),lifesign = "full",linear.output=TRUE)

EstResponseNNET<-data.frame(t=0,est=0)
for(treat in seq(0,tmax,0.05)){
  EstP<-dnorm(rep(treat,length(Y))-meanVector)
  Estindex<-(EstP>0.05&EstP<0.95)
  EstP<-EstP[Estindex]
  EstY<-Y[Estindex]
  EstT<-rep(treat,length(EstP))
  EstFrame<-data.frame(EstY=EstY,EstT=EstT,EstP=EstP,EstTsq=EstT^2,EstPsq=EstP^2,Intr=EstT*EstP)
  estResp<-mean(compute(modelNNET,covariate=EstFrame[,2:6])$net.result)
  EstResponseNNET<-rbind(EstResponseNNET,c(treat,estResp))
}


ggplot()+geom_point(aes(x=seq(0,tmax,0.05),y=trueResponse),color="red")+geom_point(aes(x=EstResponseGBM$t,y=EstResponseGBM$est),color="yellow")+geom_point(aes(x=EstResponse$t,y=EstResponse$est),color="blue")+geom_point(aes(x=EstResponseNNET$t,y=EstResponseNNET$est),color="green")