chiphuyen · ss12f32v · Jul 16, 2017
diff --git a/assignments/chatbot/chatbot.py b/assignments/chatbot/chatbot.py
@@ -19,7 +19,6 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 
 import argparse
 import os
-os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
 import random
 import sys
 import time

diff --git a/assignments/chatbot/config.py b/assignments/chatbot/config.py
@@ -16,7 +16,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 """
 
 # parameters for processing the dataset
-DATA_PATH = '/Users/Chip/data/cornell movie-dialogs corpus'
+DATA_PATH = 'data'
 CONVO_FILE = 'movie_conversations.txt'
 LINE_FILE = 'movie_lines.txt'
 OUTPUT_FILE = 'output_convo.txt'
@@ -47,8 +47,7 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 # [37049, 33519, 30223, 33513, 37371]
 # BUCKETS = [(8, 10), (12, 14), (16, 19), (23, 26), (39, 43)]
 
-# BUCKETS = [(8, 10), (12, 14), (16, 19)]
-BUCKETS = [(16, 19)]
+BUCKETS = [(8, 10), (12, 14), (16, 19)]
 
 NUM_LAYERS = 3
 HIDDEN_SIZE = 256
@@ -58,3 +57,5 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 MAX_GRAD_NORM = 5.0
 
 NUM_SAMPLES = 512
+ENC_VOCAB = 24515
+DEC_VOCAB = 24671
diff --git a/assignments/chatbot/data.py b/assignments/chatbot/data.py
@@ -17,9 +17,9 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 """
 from __future__ import print_function
 
-import os
 import random
 import re
+import os
 
 import numpy as np
 
@@ -177,6 +177,7 @@ def prepare_raw_data():
     print('Preparing raw data into train set and test set ...')
     id2line = get_lines()
     convos = get_convos()
+    print (convos)
     questions, answers = question_answers(id2line, convos)
     prepare_dataset(questions, answers)
 
@@ -253,4 +254,4 @@ def get_batch(data_bucket, bucket_id, batch_size=1):
 
 if __name__ == '__main__':
     prepare_raw_data()
-    process_data()
+    # process_data()
diff --git a/assignments/chatbot/model.py b/assignments/chatbot/model.py
@@ -22,6 +22,8 @@ class CS 20SI: "TensorFlow for Deep Learning Research"
 import tensorflow as tf
 
 import config
+import copy
+import pdb
 
 class ChatBotModel(object):
     def __init__(self, forward_only, batch_size):
@@ -49,33 +51,51 @@ def _inference(self):
         # If we use sampled softmax, we need an output projection.
         # Sampled softmax only makes sense if we sample less than vocabulary size.
         if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
-            w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
-            b = tf.get_variable('proj_b', [config.DEC_VOCAB])
+            w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB],dtype=tf.float32)
+            w_t = tf.transpose(w)
+            b = tf.get_variable('proj_b', [config.DEC_VOCAB],dtype=tf.float32)
             self.output_projection = (w, b)
 
-        def sampled_loss(inputs, labels):
+        def sampled_loss(labels, logits):
+
             labels = tf.reshape(labels, [-1, 1])
-            return tf.nn.sampled_softmax_loss(tf.transpose(w), b, inputs, labels, 
-                                              config.NUM_SAMPLES, config.DEC_VOCAB)
+            # We need to compute the sampled_softmax_loss using 32bit floats to
+            # avoid numerical instabilities. 
+            local_w_t = tf.cast(w_t, tf.float32)
+            local_b = tf.cast(b, tf.float32)
+            local_inputs = tf.cast(logits, tf.float32)
+            # return tf.nn.sampled_softmax_loss(tf.transpose(w), b, labels, logits, 
+            #                                   config.NUM_SAMPLES, config.DEC_VOCAB)
+            return tf.cast(
+            tf.nn.sampled_softmax_loss(
+                weights=local_w_t,
+                biases=local_b,
+                labels=labels,
+                inputs=local_inputs,
+                num_sampled=config.NUM_SAMPLES,
+                num_classes=config.DEC_VOCAB),
+                dtype=tf.float32)
         self.softmax_loss_function = sampled_loss
 
-        single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE)
-        self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS)
+        single_cell = tf.contrib.rnn.GRUCell(config.HIDDEN_SIZE)
+        self.cell = tf.contrib.rnn.MultiRNNCell([single_cell] * config.NUM_LAYERS)
 
     def _create_loss(self):
         print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
         start = time.time()
         def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
-            return tf.nn.seq2seq.embedding_attention_seq2seq(
-                    encoder_inputs, decoder_inputs, self.cell,
+            tmp_cell = copy.deepcopy(self.cell)
+            return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
+                    encoder_inputs, decoder_inputs, tmp_cell,
                     num_encoder_symbols=config.ENC_VOCAB,
                     num_decoder_symbols=config.DEC_VOCAB,
                     embedding_size=config.HIDDEN_SIZE,
                     output_projection=self.output_projection,
-                    feed_previous=do_decode)
+                    feed_previous=do_decode,
+                    dtype=tf.float32)
 
         if self.fw_only:
-            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                                         self.encoder_inputs, 
                                         self.decoder_inputs, 
                                         self.targets,
@@ -90,7 +110,7 @@ def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                                             self.output_projection[0]) + self.output_projection[1]
                                             for output in self.outputs[bucket]]
         else:
-            self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                                         self.encoder_inputs, 
                                         self.decoder_inputs, 
                                         self.targets,