I am creating a custom layer with weights that need to be multiplied by element-wise before activation. I can get it to work when the output and input is the same shape. The problem occurs when I have a first order array as input with a second order array as output. tensorflow.multiply supports broadcasting, but when I try to use it in Layer.call(x, self.kernel) to multiply x by the self.kernel Variable it complains that they are different shapes saying:
ValueError: Dimensions must be equal, but are 4 and 3 for 'my_layer_1/Mul' (op: 'Mul') with input shapes: [?,4], [4,3].
here is my code:
from keras import backend as K
from keras.engine.topology import Layer
import tensorflow as tf
from keras.models import Sequential
import numpy as np
class MyLayer(Layer):
def __init__(self, output_dims, **kwargs):
self.output_dims = output_dims
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=self.output_dims,
initializer='ones',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
#multiply wont work here?
return K.tf.multiply(x, self.kernel)
def compute_output_shape(self, input_shape):
return (self.output_dims)
mInput = np.array([[1,2,3,4]])
inShape = (4,)
net = Sequential()
outShape = (4,3)
l1 = MyLayer(outShape, input_shape= inShape)
net.add(l1)
net.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
p = net.predict(x=mInput, batch_size=1)
print(p)
Edit: Given input shape (4,) and output shape (4,3) the weight matrix should be the same shape as the output and initialized with ones. So in the above code the input is [1,2,3,4], the weight matrix should be [[1,1,1,1],[1,1,1,1],[1,1,1,1]] and the output should look like [[1,2,3,4],[1,2,3,4],[1,2,3,4]]
Before multiplying, you need to repeat the elements to increase the shape.
You can use K.repeat_elements
for that. (import keras.backend as K
)
class MyLayer(Layer):
#there are some difficulties for different types of shapes
#let's use a 'repeat_count' instead, increasing only one dimension
def __init__(self, repeat_count,**kwargs):
self.repeat_count = repeat_count
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
#first, let's get the output_shape
output_shape = self.compute_output_shape(input_shape)
weight_shape = (1,) + output_shape[1:] #replace the batch size by 1
self.kernel = self.add_weight(name='kernel',
shape=weight_shape,
initializer='ones',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this somewhere!
#here, we need to repeat the elements before multiplying
def call(self, x):
if self.repeat_count > 1:
#we add the extra dimension:
x = K.expand_dims(x, axis=1)
#we replicate the elements
x = K.repeat_elements(x, rep=self.repeat_count, axis=1)
#multiply
return x * self.kernel
#make sure we comput the ouptut shape according to what we did in "call"
def compute_output_shape(self, input_shape):
if self.repeat_count > 1:
return (input_shape[0],self.repeat_count) + input_shape[1:]
else:
return input_shape