Add files via upload

ruoqi-liu · web-flow · commit ffcf498eb9ed · 2020-08-26T22:07:26.000-04:00
diff --git a/README.md b/README.md
@@ -0,0 +1,74 @@
+# DSW
+
+# Full list of covariates
+We show the full list of static demographics and time-varying covariates of sepsis patients obtained from [MIMIC-III](https://mimic.physionet.org/).
+| Category     | Items                                                   | Type   |
+|--------------|---------------------------------------------------------|--------|
+| Demographics | age                                                     | Cont.  |
+|              | gender                                                  | Binary |
+|              | race (white, black, hispanic, other)                    | Binary |
+|              | metastatic cancer                                       | Binary |
+|              | diabetes                                                | Binary |
+|              | height                                                  | Cont.  |
+|              | weight                                                  | Cont.  |
+|              | bmi                                                     | Cont.  |
+| Vital signs  | heart rate, systolic, mean and diastolic blood pressure | Cont.  |
+|              | Respiratory rate, SpO2                                  | Cont.  |
+|              | Temperatures                                            | Cont.  |
+| Lab tests    | sodium, chloride, magnesium                             | Cont.  |
+|              | glucose, BUN, creatinine, urineoutput, GCS              | Cont.  |
+|              | white blood cells count, bands, C-Reactive protein      | Cont.  |
+|              | hemoglobin, hematocrit, aniongap                        | Cont.  |
+|              | platelets count, PTT, PT, INR                           | Cont.  |
+|              | bicarbonate, lactate                                    | Cont.  |
+
+# Introduction
+This repository contains source code for paper ["Estimating Individual Treatment Effects with Time-Varying Confounders"](). 
+
+In this paper, we study the problem of Estimating individual treatment effects with time-varying confounders (as illustrated by a causal graph in the figure below)
+
+<img src="src/Fig1.png" width=40%>
+
+We propose Deep Sequential Weighting (DSW) for estimating ITE with time-varying confounders. DSW consists of three main components: representation learning module, balancing module and prediction module.
+
+<img src="src/model4.png" width=80%>
+
+To demonstrate the effectiveness of our framework, we conduct comprehensive experiments on synthetic, semi-synthetic and real-world EMR datasets ([MIMIC-III](https://mimic.physionet.org/)). DSW outperforms state-of-the-art baselines in terms of PEHE and ATE.
+
+# Requirement
+Ubuntu16.04, python 3.6
+
+Install [pytorch 1.4](https://pytorch.org/)
+
+# Data preprocessing
+### Synthetic dataset
+Simulate the all covariates, treatments and outcomes
+```
+cd simulation
+python synthetic.py
+```
+
+### Semi-synthetic dataset
+With a similar simulation process, we construct a semi-synthetic dataset based on a real-world dataset: [MIMIC-III](https://mimic.physionet.org/). 
+```
+cd simulation
+python synthetic_mimic.py
+```
+
+### MIMIC-III dataset
+Obtain the patients data of two treatment-outcome pairs: (1) vasopressor-Meanbp; (2) ventilator-SpO2.
+```
+cd simulation
+python pre_mimic.py
+```
+
+# DSW
+#### Running example 
+```
+python train_synthetic.py --observation_window 30 --epochs 64 --batch-size 128 --lr 1e-3
+```
+
+#### Outputs
+- ITE estimation metrics: PEHE, ATE
+- Factual prediction metric: RMSE
+
diff --git a/data_loader_syn.py b/data_loader_syn.py
@@ -0,0 +1,53 @@
+import numpy as np
+import torch
+from torch.utils import data
+
+gamma=0.1
+
+data_dir = 'simulation/data_mymodel_new2_{}/'.format(gamma)
+
+
+# dataset meta data
+n_X_features = 100
+n_X_static_features = 5
+n_X_t_types = 1
+n_classes = 1
+
+
+def get_dim():
+    return n_X_features, n_X_static_features, n_X_t_types, n_classes
+
+
+
+class SyntheticDataset(data.Dataset):
+    def __init__(self, list_IDs, obs_w, treatment):
+        '''Initialization'''
+        self.list_IDs = list_IDs
+        self.obs_w = obs_w
+        self.treatment = treatment
+
+
+    def __len__(self):
+        '''Denotes the total number of samples'''
+        return len(self.list_IDs)
+
+    def __getitem__(self, index):
+        '''Generates one sample of data'''
+        # Select sample
+        ID = self.list_IDs[index]
+
+        # Load labels
+        label = np.load(data_dir + '{}.y.npy'.format(ID))
+
+        # Load data
+        X_demographic = np.load(data_dir + '{}.static.npy'.format(ID))
+        X_all = np.load(data_dir + '{}.x.npy'.format(ID))
+        X_treatment_res = np.load(data_dir + '{}.a.npy'.format(ID))
+
+        X = torch.from_numpy(X_all.astype(np.float32))
+        X_demo = torch.from_numpy(X_demographic.astype(np.float32))
+        X_treatment = torch.from_numpy(X_treatment_res.astype(np.float32))
+        y = torch.from_numpy(label.astype(np.float32))
+
+        return X, X_demo, X_treatment, y
+
diff --git a/model_synthetic.py b/model_synthetic.py
@@ -0,0 +1,211 @@
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+
+class Attn(nn.Module):
+    def __init__(self, method, hidden_size):
+        super(Attn, self).__init__()
+        self.method = method
+        if self.method not in ['dot', 'general', 'concat','concat2']:
+            raise ValueError(self.method, "is not an appropriate attention method.")
+        self.hidden_size = hidden_size
+        if self.method == 'general':
+            self.attn = nn.Linear(self.hidden_size, hidden_size)
+        elif self.method == 'concat':
+            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
+            self.v = nn.Parameter(torch.FloatTensor(hidden_size))
+
+        elif self.method == 'concat2':
+            self.attn = nn.Linear(self.hidden_size * 3, hidden_size)
+            self.v = nn.Parameter(torch.FloatTensor(hidden_size))
+
+    def dot_score(self, hidden, encoder_output):
+        return torch.sum(hidden * encoder_output, dim=2)
+
+    def general_score(self, hidden, encoder_output):
+        energy = self.attn(encoder_output)
+        return torch.sum(hidden * energy, dim=2)
+
+    def concat_score(self, hidden, encoder_output):
+        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
+        return torch.sum(self.v * energy, dim=2)
+
+    def concat_score2(self, hidden, encoder_output):
+        h = torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)
+        h = torch.cat((h, hidden*encoder_output),2)
+        energy = self.attn(h).tanh()
+        return torch.sum(self.v * energy, dim=2)
+
+    def forward(self, hidden, encoder_outputs):
+        # Calculate the attention weights (energies) based on the given method
+        if self.method == 'general':
+            attn_energies = self.general_score(hidden, encoder_outputs)
+        elif self.method == 'concat':
+            attn_energies = self.concat_score(hidden, encoder_outputs)
+        elif self.method == 'dot':
+            attn_energies = self.dot_score(hidden, encoder_outputs)
+        elif self.method == 'concat2':
+            attn_energies = self.concat_score2(hidden, encoder_outputs)
+
+        # Transpose max_length and batch_size dimensions
+        attn_energies = attn_energies.t()
+
+        # Return the softmax normalized probability scores (with added dimension)
+        return F.softmax(attn_energies, dim=1).unsqueeze(1)
+
+class LSTMModel(nn.Module):
+    def __init__(self, n_X_features, n_X_static_features, n_X_fr_types, n_Z_confounders,
+                 attn_model, n_classes, obs_w,
+                 batch_size, hidden_size,
+                 num_layers=2, bidirectional=True, dropout = 0.2):
+        super().__init__()
+
+        self.hidden_size = hidden_size
+        self.batch_size = batch_size
+        self.n_X_features = n_X_features
+        self.n_X_static_features = n_X_static_features
+        self.n_classes = n_classes
+        self.obs_w = obs_w
+        self.num_layers = num_layers
+        self.x_emb_size = 32
+        self.x_static_emb_size = 16
+        self.z_dim = n_Z_confounders
+
+        if bidirectional:
+            self.num_directions = 2
+        else:
+            self.num_directions = 1
+
+        self.n_t_classes = 1
+
+        self.rnn_f = nn.GRUCell(input_size=self.x_emb_size + 1 + n_Z_confounders, hidden_size=hidden_size)
+        self.rnn_cf = nn.GRUCell(input_size=self.x_emb_size + 1 + n_Z_confounders, hidden_size=hidden_size)
+
+        self.attn_f = Attn(attn_model, hidden_size)
+        self.concat_f = nn.Linear(hidden_size * 2, hidden_size)
+
+        self.attn_cf = Attn(attn_model, hidden_size)
+        self.concat_cf = nn.Linear(hidden_size * 2, hidden_size)
+
+
+
+        self.x2emb = nn.Linear(n_X_features, self.x_emb_size)
+        self.x_static2emb = nn.Linear(n_X_static_features, self.x_static_emb_size)
+
+        # IPW
+        self.hidden2hidden_ipw = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(self.x_emb_size + hidden_size + self.x_static_emb_size, hidden_size),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+        )
+        self.hidden2out_ipw = nn.Linear(hidden_size, self.n_t_classes, bias=False)
+
+        # Outcome
+        self.hidden2hidden_outcome_f = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear((self.x_emb_size + hidden_size) + self.x_static_emb_size + 1, hidden_size),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+        )
+        self.hidden2out_outcome_f = nn.Linear(hidden_size, self.n_classes, bias=False)
+
+        self.hidden2hidden_outcome_cf = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(self.x_emb_size + hidden_size + self.x_static_emb_size + 1, hidden_size),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+        )
+        self.hidden2out_outcome_cf = nn.Linear(hidden_size, self.n_classes, bias=False)
+
+
+    def feature_encode(self, x, x_fr):
+
+        f_hx = torch.randn(x.size(0), self.hidden_size)
+        cf_hx = torch.randn(x.size(0), self.hidden_size)
+        f_old = f_hx
+        cf_old = cf_hx
+        f_outputs = []
+        f_zxs = []
+        cf_outputs = []
+        cf_zxs = []
+        for i in range(x.size(1)):
+            x_emb = self.x2emb(x[:, i, :])
+            f_zx = torch.cat((x_emb, f_old), -1)
+            f_zxs.append(f_zx)
+
+            cf_zx = torch.cat((x_emb, cf_old), -1)
+            cf_zxs.append(cf_zx)
+
+            f_inputs = torch.cat((f_zx, x_fr[:,i].unsqueeze(1)), -1)
+
+            cf_treatment = torch.where(x_fr.sum(1)==0, torch.Tensor([1]), torch.Tensor([0])).unsqueeze(1)
+            cf_inputs = torch.cat((cf_zx, cf_treatment), -1)
+
+            f_hx = self.rnn_f(f_inputs, f_hx)
+            cf_hx = self.rnn_cf(cf_inputs, cf_hx)
+
+            if i == 0:
+                f_concat_input = torch.cat((f_hx, f_hx), 1)
+                cf_concat_input = torch.cat((cf_hx, cf_hx), 1)
+            else:
+                f_attn_weights = self.attn_f(f_hx, torch.stack(f_outputs))
+                f_context = f_attn_weights.bmm(torch.stack(f_outputs).transpose(0, 1))
+                f_context = f_context.squeeze(1)
+                f_concat_input = torch.cat((f_hx, f_context), 1)
+
+                cf_attn_weights = self.attn_cf(cf_hx, torch.stack(cf_outputs))
+                cf_context = cf_attn_weights.bmm(torch.stack(cf_outputs).transpose(0, 1))
+                cf_context = cf_context.squeeze(1)
+                cf_concat_input = torch.cat((cf_hx, cf_context), 1)
+
+            f_concat_output = torch.tanh(self.concat_f(f_concat_input))
+            f_old = f_concat_output
+
+            cf_concat_output = torch.tanh(self.concat_cf(cf_concat_input))
+            cf_old = cf_concat_output
+
+            f_outputs.append(f_hx)
+            cf_outputs.append(cf_hx)
+
+        return f_zxs, cf_zxs
+
+
+    def forward(self, x, x_demo, x_fr):
+
+        f_zxs, cf_zxs = self.feature_encode(x, x_fr)
+
+        # IPW
+        ipw_outputs = []
+        x_demo_emd = self.x_static2emb(x_demo)
+        for i in range(len(f_zxs)):
+            h = torch.cat((f_zxs[i], x_demo_emd), -1)
+            h = self.hidden2hidden_ipw(h)
+            ipw_out = self.hidden2out_ipw(h)
+            ipw_outputs.append(ipw_out)
+
+
+        # Outcome
+        f_treatment = torch.where(x_fr.sum(1) > 0, torch.Tensor([1]), torch.Tensor([0])).unsqueeze(1)
+        cf_treatment = torch.where(x_fr.sum(1) > 0, torch.Tensor([0]), torch.Tensor([1])).unsqueeze(1)
+
+        # factual prediction
+
+        f_zx_maxpool = torch.max(torch.stack(f_zxs), 0)
+
+        f_hidden = torch.cat((f_zx_maxpool[0], x_demo_emd, f_treatment), -1)
+        f_h = self.hidden2hidden_outcome_f(f_hidden)
+
+        f_outcome_out = self.hidden2out_outcome_f(f_h)
+
+        # counterfactual prediction
+
+        cf_zx_maxpool = torch.max(torch.stack(cf_zxs), 0)
+
+        cf_hidden = torch.cat((cf_zx_maxpool[0], x_demo_emd, cf_treatment), -1)
+        cf_h = self.hidden2hidden_outcome_cf(cf_hidden)
+
+        cf_outcome_out = self.hidden2out_outcome_cf(cf_h)
+
+
+        return ipw_outputs, f_outcome_out, cf_outcome_out, f_h
diff --git a/train_synthetic.py b/train_synthetic.py