I'm having trouble using buckets in my Tensorflow model. When I run it with buckets = [(100, 100)]
, it works fine. When I run it with buckets = [(100, 100), (200, 200)]
it doesn't work at all (stacktrace at bottom).
Interestingly, running Tensorflow's Seq2Seq tutorial gives the same kind of issue with a nearly identical stacktrace. For testing purposes, the link to the repository is here.
I'm not sure what the issue is, but having more than one bucket always seems to trigger it.
This code won't work as a standalone, but this is the function where it is crashing - remember that changing buckets
from [(100, 100)]
to [(100, 100), (200, 200)]
triggers the crash.
class MySeq2Seq(object):
def __init__(self, source_vocab_size, target_vocab_size, buckets, size, num_layers, batch_size, learning_rate):
self.source_vocab_size = source_vocab_size
self.target_vocab_size = target_vocab_size
self.buckets = buckets
self.batch_size = batch_size
cell = single_cell = tf.nn.rnn_cell.GRUCell(size)
if num_layers > 1:
cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
# The seq2seq function: we use embedding for the input and attention
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs, decoder_inputs, cell,
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
feed_previous=do_decode)
# Feeds for inputs
self.encoder_inputs = []
self.decoder_inputs = []
self.target_weights = []
for i in range(buckets[-1][0]):
self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
for i in range(buckets[-1][1] + 1):
self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
self.target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))
# Our targets are decoder inputs shifted by one
targets = [self.decoder_inputs[i + 1] for i in range(len(self.decoder_inputs) - 1)]
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, [(100, 100)],
lambda x, y: seq2seq_f(x, y, False))
# Gradients update operation for training the model
params = tf.trainable_variables()
self.updates = []
for b in range(len(buckets)):
self.updates.append(tf.train.AdamOptimizer(learning_rate).minimize(self.losses[b]))
self.saver = tf.train.Saver(tf.global_variables())
Stacktrace:
Traceback (most recent call last):
File "D:/Stuff/IdeaProjects/myproject/src/main.py", line 38, in <module>
model = predict.make_model(input_vocab_size, output_vocab_size, buckets, cell_size, model_layers, batch_size, learning_rate)
File "D:\Stuff\IdeaProjects\myproject\src\predictor.py", line 88, in make_model
size=cell_size, num_layers=model_layers, batch_size=batch_size, learning_rate=learning_rate)
File "D:\Stuff\IdeaProjects\myproject\src\predictor.py", line 45, in __init__
lambda x, y: seq2seq_f(x, y, False))
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\legacy_seq2seq\python\ops\seq2seq.py", line 1206, in model_with_buckets
decoder_inputs[:bucket[1]])
File "D:\Stuff\IdeaProjects\myproject\src\predictor.py", line 45, in <lambda>
lambda x, y: seq2seq_f(x, y, False))
File "D:\Stuff\IdeaProjects\myproject\src\predictor.py", line 28, in seq2seq_f
feed_previous=do_decode)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\legacy_seq2seq\python\ops\seq2seq.py", line 848, in embedding_attention_seq2seq
encoder_cell = copy.deepcopy(cell)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 161, in deepcopy
y = copier(memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 476, in __deepcopy__
setattr(result, k, copy.deepcopy(v, memo))
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 150, in deepcopy
y = copier(x, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 215, in _deepcopy_list
append(deepcopy(a, memo))
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 150, in deepcopy
y = copier(x, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 150, in deepcopy
y = copier(x, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 150, in deepcopy
y = copier(x, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 180, in deepcopy
y = _reconstruct(x, memo, *rv)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 280, in _reconstruct
state = deepcopy(state, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 150, in deepcopy
y = copier(x, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 240, in _deepcopy_dict
y[deepcopy(key, memo)] = deepcopy(value, memo)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\copy.py", line 169, in deepcopy
rv = reductor(4)
TypeError: can't pickle _thread.lock objects
The problem is with latest changes in seq2seq.py
. Add this to your script and it will avoid deep-coping of the cells:
setattr(tf.contrib.rnn.GRUCell, '__deepcopy__', lambda self, _: self)
setattr(tf.contrib.rnn.BasicLSTMCell, '__deepcopy__', lambda self, _: self)
setattr(tf.contrib.rnn.MultiRNNCell, '__deepcopy__', lambda self, _: self)