BinRoot
diff --git a/‎.gitignore
Lines changed: 2 additions & 1 deletion b/‎.gitignore
Lines changed: 2 additions & 1 deletion
diff --git a/‎ch02_basics/.gitignore
Lines changed: 1 addition & 0 deletions b/‎ch02_basics/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎ch02_basics/gradient.py
Lines changed: 17 additions & 0 deletions b/‎ch02_basics/gradient.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎ch05_clustering/audio_clustering.py
Lines changed: 37 additions & 15 deletions b/‎ch05_clustering/audio_clustering.py
Lines changed: 37 additions & 15 deletions
diff --git a/‎ch05_clustering/audio_dataset/cough_1.wav
95.7 KB b/‎ch05_clustering/audio_dataset/cough_1.wav
95.7 KB
diff --git a/‎ch05_clustering/audio_dataset/cough_1.wav.png
26.8 KB b/‎ch05_clustering/audio_dataset/cough_1.wav.png
26.8 KB
diff --git a/‎ch05_clustering/audio_dataset/cough_2.wav
270 KB b/‎ch05_clustering/audio_dataset/cough_2.wav
270 KB
diff --git a/‎ch05_clustering/audio_dataset/cough_2.wav.png
27.1 KB b/‎ch05_clustering/audio_dataset/cough_2.wav.png
27.1 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_1.wav
218 KB b/‎ch05_clustering/audio_dataset/scream_1.wav
218 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_1.wav.png
27.8 KB b/‎ch05_clustering/audio_dataset/scream_1.wav.png
27.8 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_2.wav
212 KB b/‎ch05_clustering/audio_dataset/scream_2.wav
212 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_2.wav.png
26.9 KB b/‎ch05_clustering/audio_dataset/scream_2.wav.png
26.9 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_3.wav
580 KB b/‎ch05_clustering/audio_dataset/scream_3.wav
580 KB
diff --git a/‎ch05_clustering/audio_dataset/scream_3.wav.png
30.4 KB b/‎ch05_clustering/audio_dataset/scream_3.wav.png
30.4 KB
diff --git a/‎ch06_planning/datastore.py
Lines changed: 142 additions & 0 deletions b/‎ch06_planning/datastore.py
Lines changed: 142 additions & 0 deletions
diff --git a/‎ch06_planning/hmm.py
Lines changed: 36 additions & 0 deletions b/‎ch06_planning/hmm.py
Lines changed: 36 additions & 0 deletions
diff --git a/‎ch06_planning/prices.png
39.6 KB b/‎ch06_planning/prices.png
39.6 KB
diff --git a/‎ch06_planning/stock_prices.npy
59.1 KB b/‎ch06_planning/stock_prices.npy
59.1 KB
diff --git a/‎ch06_planning/stock_prices_backup.npy
48 KB b/‎ch06_planning/stock_prices_backup.npy
48 KB
diff --git a/‎ch08_autoencoder/.gitignore
Lines changed: 1 addition & 0 deletions b/‎ch08_autoencoder/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎ch08_autoencoder/README.md
Lines changed: 18 additions & 0 deletions b/‎ch08_autoencoder/README.md
Lines changed: 18 additions & 0 deletions
diff --git a/‎ch08_autoencoder/autoencoder.py
Lines changed: 35 additions & 6 deletions b/‎ch08_autoencoder/autoencoder.py
Lines changed: 35 additions & 6 deletions
@@ -1,3 +1,4 @@
 .idea
 *.pyc
-*~
+*~
+*.ckpt
@@ -0,0 +1 @@
+logs
@@ -0,0 +1,17 @@
+import tensorflow as tf
+
+def my_loss_function(var, data):
+    return tf.abs(tf.sub(var, data))
+
+def my_other_loss_function(var, data):
+    return tf.square(tf.sub(var, data))
+
+data = tf.placeholder(tf.float32)
+var = tf.Variable(1.)
+loss = my_loss_function(var, data)
+var_grad = tf.gradients(loss, [var])[0]
+
+with tf.Session() as sess:
+    sess.run(tf.initialize_all_variables())
+    var_grad_val = sess.run(var_grad, feed_dict={data: 4})
+    print(var_grad_val)
@@ -14,55 +14,77 @@
 chromo = tf.placeholder(tf.float32)
 max_freqs = tf.argmax(chromo, 0)
 
+
 def get_next_chromogram(sess):
     audio_file = sess.run(filename)
     F = Chromagram(audio_file, nfft=16384, wfft=8192, nhop=2205)
-    return F.X
+    return F.X, audio_file
+
 
 def extract_feature_vector(sess, chromo_data):
     num_features, num_samples = np.shape(chromo_data)
     freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
     hist, bins = np.histogram(freq_vals, bins=range(num_features + 1))
-    return hist.astype(float) / num_samples
+    normalized_hist = hist.astype(float) / num_samples
+    return normalized_hist
+
 
 def get_dataset(sess):
     num_files = sess.run(count_num_files)
     coord = tf.train.Coordinator()
     threads = tf.train.start_queue_runners(coord=coord)
-    xs = []
-    for i in range(num_files):
-        chromo_data = get_next_chromogram(sess)
-        x = [extract_feature_vector(sess, chromo_data)]
-        x = np.matrix(x)
-        if len(xs) == 0:
-            xs = x
-        else:
-            xs = np.vstack((xs, x))
-    return xs
+    xs = list()
+    names = list()
+    plt.figure()
+    for _ in range(num_files):
+        chromo_data, filename = get_next_chromogram(sess)
+
+        plt.subplot(1, 2, 1)
+        plt.imshow(chromo_data, cmap='Greys', interpolation='nearest')
+        plt.title('Visualization of Sound Spectrum')
+
+        plt.subplot(1, 2, 2)
+        freq_vals = sess.run(max_freqs, feed_dict={chromo: chromo_data})
+        plt.hist(freq_vals)
+        plt.title('Histogram of Notes')
+        plt.xlabel('Musical Note')
+        plt.ylabel('Count')
+        plt.savefig('{}.png'.format(filename))
+        plt.clf()
+
+        plt.clf()
+        names.append(filename)
+        x = extract_feature_vector(sess, chromo_data)
+        xs.append(x)
+    xs = np.asmatrix(xs)
+    return xs, names
+
 
 def initial_cluster_centroids(X, k):
     return X[0:k, :]
 
+
 def assign_cluster(X, centroids):
     expanded_vectors = tf.expand_dims(X, 0)
     expanded_centroids = tf.expand_dims(centroids, 1)
     distances = tf.reduce_sum(tf.square(tf.sub(expanded_vectors, expanded_centroids)), 2)
     mins = tf.argmin(distances, 0)
     return mins
 
+
 def recompute_centroids(X, Y):
     sums = tf.unsorted_segment_sum(X, Y, k)
     counts = tf.unsorted_segment_sum(tf.ones_like(X), Y, k)
     return sums / counts
 
+
 with tf.Session() as sess:
     sess.run(tf.initialize_all_variables())
-    X = get_dataset(sess)
-    print(X)
+    X, names = get_dataset(sess)
     centroids = initial_cluster_centroids(X, k)
     i, converged = 0, False
     while not converged and i < max_iterations:
         i += 1
         Y = assign_cluster(X, centroids)
         centroids = sess.run(recompute_centroids(X, Y))
-    print(centroids)
+    print(zip(sess.run(Y), names))
@@ -0,0 +1,142 @@
+from yahoo_finance import Share
+from matplotlib import pyplot as plt
+import numpy as np
+import random
+import tensorflow as tf
+import random
+
+
+class DecisionPolicy:
+    def select_action(self, current_state, step):
+        pass
+
+    def update_q(self, state, action, reward, next_state):
+        pass
+
+
+class RandomDecisionPolicy(DecisionPolicy):
+    def __init__(self, actions):
+        self.actions = actions
+
+    def select_action(self, current_state, step):
+        action = self.actions[random.randint(0, len(self.actions) - 1)]
+        return action
+
+
+class QLearningDecisionPolicy(DecisionPolicy):
+    def __init__(self, actions, input_dim):
+        self.epsilon = 0.9
+        self.gamma = 0.001
+        self.actions = actions
+        output_dim = len(actions)
+        h1_dim = 200
+
+        self.x = tf.placeholder(tf.float32, [None, input_dim])
+        self.y = tf.placeholder(tf.float32, [output_dim])
+        W1 = tf.Variable(tf.random_normal([input_dim, h1_dim]))
+        b1 = tf.Variable(tf.constant(0.1, shape=[h1_dim]))
+        h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1)
+        W2 = tf.Variable(tf.random_normal([h1_dim, output_dim]))
+        b2 = tf.Variable(tf.constant(0.1, shape=[output_dim]))
+        self.q = tf.nn.relu(tf.matmul(h1, W2) + b2)
+
+        loss = tf.square(self.y - self.q)
+        self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)
+        self.sess = tf.Session()
+        self.sess.run(tf.initialize_all_variables())
+
+    def select_action(self, current_state, step):
+        threshold = min(self.epsilon, step / 1000.)
+        if random.random() < threshold:
+            # Exploit best option with probability epsilon
+            action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})
+            action_idx = np.argmax(action_q_vals)  # TODO: replace w/ tensorflow's argmax
+            action = self.actions[action_idx]
+        else:
+            # Explore random option with probability 1 - epsilon
+            action = self.actions[random.randint(0, len(self.actions) - 1)]
+        return action
+
+    def update_q(self, state, action, reward, next_state):
+        action_q_vals = self.sess.run(self.q, feed_dict={self.x: state})
+        next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})
+        next_action_idx = np.argmax(next_action_q_vals)
+        action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx]
+        action_q_vals = np.squeeze(np.asarray(action_q_vals))
+        self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals})
+
+
+def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist, debug=False):
+    budget = initial_budget
+    num_stocks = initial_num_stocks
+    share_value = 0
+    transitions = list()
+    for i in range(len(prices) - hist - 1):
+        if i % 100 == 0:
+            print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))
+        current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))
+        current_portfolio = budget + num_stocks * share_value
+        action = policy.select_action(current_state, i)
+        share_value = float(prices[i + hist + 1])
+        if action == 'Buy' and budget >= share_value:
+            budget -= share_value
+            num_stocks += 1
+        elif action == 'Sell' and num_stocks > 0:
+            budget += share_value
+            num_stocks -= 1
+        else:
+            action = 'Hold'
+        new_portfolio = budget + num_stocks * share_value
+        reward = new_portfolio - current_portfolio
+        next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget, num_stocks)))
+        transitions.append((current_state, action, reward, next_state))
+        policy.update_q(current_state, action, reward, next_state)
+
+    portfolio = budget + num_stocks * share_value
+    if debug:
+        print('${}\t{} shares'.format(budget, num_stocks))
+    return portfolio
+
+
+def run_simulations(policy, budget, num_stocks, prices, hist):
+    num_tries = 10
+    final_portfolios = list()
+    for i in range(num_tries):
+        final_portfolio = run_simulation(policy, budget, num_stocks, prices, hist)
+        final_portfolios.append(final_portfolio)
+    avg, std = np.mean(final_portfolios), np.std(final_portfolios)
+    return avg, std
+
+
+def get_prices(share_symbol, start_date, end_date, cache_filename='stock_prices.npy'):
+    try:
+        stock_prices = np.load(cache_filename)
+    except IOError:
+        share = Share(share_symbol)
+        stock_hist = share.get_historical(start_date, end_date)
+        stock_prices = [stock_price['Open'] for stock_price in stock_hist]
+        np.save(cache_filename, stock_prices)
+
+    return stock_prices
+
+
+def plot_prices(prices):
+    plt.title('Opening stock prices')
+    plt.xlabel('day')
+    plt.ylabel('price ($)')
+    plt.plot(prices)
+    plt.savefig('prices.png')
+
+
+if __name__ == '__main__':
+    prices = get_prices('MSFT', '1992-07-22', '2016-07-22')
+    plot_prices(prices)
+    actions = ['Buy', 'Sell', 'Hold']
+    hist = 200
+    # policy = RandomDecisionPolicy(actions)
+    policy = QLearningDecisionPolicy(actions, hist + 2)
+    budget = 1000.0
+    num_stocks = 0
+    avg, std = run_simulations(policy, budget, num_stocks, prices, hist)
+    print(avg, std)
+
@@ -0,0 +1,36 @@
+import tensorflow as tf
+import numpy as np
+
+# Num states
+K = 2
+
+# Num observations
+N = 3
+
+# Observations
+Y = [0, 1, 2]
+
+# Prior (K)
+prior = tf.constant([0.6, 0.4], dtype=tf.double)
+
+# Transition matrix (K x K)
+T = tf.constant([[0.7, 0.3],
+                 [0.4, 0.6]], dtype=tf.double)
+
+# Emission matrix (K x N)
+B = tf.constant([[0.5, 0.4, 0.1],
+                 [0.1, 0.3, 0.6]], dtype=tf.double)
+
+# (K x T)
+T1 = tf.Variable(tf.zeros([N, K], dtype=tf.double))
+T2 = tf.Variable(tf.zeros([N, K], dtype=tf.double))
+
+with tf.Session() as sess:
+    sess.run(tf.initialize_all_variables())
+    y0 = Y[0]
+    t1_update = tf.mul(B[:, y0], prior)
+    t1_update_val = sess.run(t1_update)
+    print(np.shape(t1_update_val))
+    print(t1_update_val)
+    T1_val = tf.scatter_update(T1, [0], [[1., 1.]])
+    print(sess.run(T1_val))
@@ -0,0 +1 @@
+cifar-10-batches-py
@@ -0,0 +1,18 @@
+# Info
+
+Example of how to implement an autoencoder in TensorFlow
+
+# Setup
+
+* [Install TensorFlow](https://www.tensorflow.org/get_started/os_setup.html)
+
+* [Install h5py](http://docs.h5py.org/en/latest/build.html)
+
+    $ sudo apt-get install python-h5py
+
+# Run
+
+See `main.py` for API usage.
+Running it will train and test an autoencoder on the famous Iris dataset.
+
+    $ python main.py
@@ -6,34 +6,36 @@ def get_batch(X, size):
     return X[a]
 
 class Autoencoder:
-    def __init__(self, input_dim, hidden_dim, epoch=1000, batch_size=50, learning_rate=0.001):
+    def __init__(self, input_dim, hidden_dim, epoch=1000, batch_size=10, learning_rate=0.001):
         self.epoch = epoch
         self.batch_size = batch_size
         self.learning_rate = learning_rate
 
         x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
         with tf.name_scope('encode'):
-            weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32))
-            biases = tf.Variable(tf.zeros([hidden_dim]))
+            weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
+            biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')
             encoded = tf.nn.sigmoid(tf.matmul(x, weights) + biases)
         with tf.name_scope('decode'):
-            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32))
-            biases = tf.Variable(tf.zeros([input_dim]))
+            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
+            biases = tf.Variable(tf.zeros([input_dim]), name='biases')
             decoded = tf.matmul(encoded, weights) + biases
 
         self.x = x
         self.encoded = encoded
         self.decoded = decoded
 
         self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded))))
+
+        self.all_loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(self.x, self.decoded)), 1))
         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
         self.saver = tf.train.Saver()
 
     def train(self, data):
         with tf.Session() as sess:
             sess.run(tf.initialize_all_variables())
             for i in range(self.epoch):
-                for j in range(50):
+                for j in range(500):
                     batch_data = get_batch(data, self.batch_size)
                     l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
                 if i % 10 == 0:
@@ -55,3 +57,30 @@ def get_params(self):
             self.saver.restore(sess, './model.ckpt')
             weights, biases = sess.run([self.weights1, self.biases1])
         return weights, biases
+
+    def classify(self, data, labels):
+        with tf.Session() as sess:
+            sess.run(tf.initialize_all_variables())
+            self.saver.restore(sess, './model.ckpt')
+            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
+            reconstructed = reconstructed[0]
+            # loss = sess.run(self.all_loss, feed_dict={self.x: data})
+            print('data', np.shape(data))
+            print('reconstructed', np.shape(reconstructed))
+            loss = np.sqrt(np.mean(np.square(data - reconstructed), axis=1))
+            print('loss', np.shape(loss))
+            horse_indices = np.where(labels == 7)[0]
+            not_horse_indices = np.where(labels != 7)[0]
+            horse_loss = np.mean(loss[horse_indices])
+            not_horse_loss = np.mean(loss[not_horse_indices])
+            print('horse', horse_loss)
+            print('not horse', not_horse_loss)
+            return hidden[7,:]
+
+    def decode(self, encoding):
+        with tf.Session() as sess:
+            sess.run(tf.initialize_all_variables())
+            self.saver.restore(sess, './model.ckpt')
+            reconstructed = sess.run(self.decoded, feed_dict={self.encoded: encoding})
+        img = np.reshape(reconstructed, (32, 32))
+        return img
-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 .idea
 *.pyc
 -*~
 +*~
 +*.ckpt