element-wise multiplication with broadcasting in keras custom layer

buckithed picture buckithed · Nov 14, 2017 · Viewed 10.8k times · Source

I am creating a custom layer with weights that need to be multiplied by element-wise before activation. I can get it to work when the output and input is the same shape. The problem occurs when I have a first order array as input with a second order array as output. tensorflow.multiply supports broadcasting, but when I try to use it in Layer.call(x, self.kernel) to multiply x by the self.kernel Variable it complains that they are different shapes saying:

ValueError: Dimensions must be equal, but are 4 and 3 for 'my_layer_1/Mul' (op: 'Mul') with input shapes: [?,4], [4,3].

here is my code:

from keras import backend as K
from keras.engine.topology import Layer
import tensorflow as tf
from keras.models import Sequential
import numpy as np

class MyLayer(Layer):

    def __init__(self, output_dims, **kwargs):
        self.output_dims = output_dims

        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.kernel = self.add_weight(name='kernel',
                                      shape=self.output_dims,
                                      initializer='ones',
                                      trainable=True)


        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        #multiply wont work here?
        return K.tf.multiply(x, self.kernel)

    def compute_output_shape(self, input_shape):
        return (self.output_dims)

mInput = np.array([[1,2,3,4]])
inShape = (4,)
net = Sequential()
outShape = (4,3)
l1 = MyLayer(outShape, input_shape= inShape)
net.add(l1)
net.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
p = net.predict(x=mInput, batch_size=1)
print(p)

Edit: Given input shape (4,) and output shape (4,3) the weight matrix should be the same shape as the output and initialized with ones. So in the above code the input is [1,2,3,4], the weight matrix should be [[1,1,1,1],[1,1,1,1],[1,1,1,1]] and the output should look like [[1,2,3,4],[1,2,3,4],[1,2,3,4]]

Answer

Daniel Möller picture Daniel Möller · Nov 14, 2017

Before multiplying, you need to repeat the elements to increase the shape. You can use K.repeat_elements for that. (import keras.backend as K)

class MyLayer(Layer):

    #there are some difficulties for different types of shapes   
    #let's use a 'repeat_count' instead, increasing only one dimension
    def __init__(self, repeat_count,**kwargs):
        self.repeat_count = repeat_count
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):

        #first, let's get the output_shape
        output_shape = self.compute_output_shape(input_shape)
        weight_shape = (1,) + output_shape[1:] #replace the batch size by 1


        self.kernel = self.add_weight(name='kernel',
                                      shape=weight_shape,
                                      initializer='ones',
                                      trainable=True)


        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    #here, we need to repeat the elements before multiplying
    def call(self, x):

        if self.repeat_count > 1:

             #we add the extra dimension:
             x = K.expand_dims(x, axis=1)

             #we replicate the elements
             x = K.repeat_elements(x, rep=self.repeat_count, axis=1)


        #multiply
        return x * self.kernel


    #make sure we comput the ouptut shape according to what we did in "call"
    def compute_output_shape(self, input_shape):

        if self.repeat_count > 1:
            return (input_shape[0],self.repeat_count) + input_shape[1:]
        else:
            return input_shape