I'm able to compute the gradient of the predicted class (class 0) with regard to input samples in the mini-batch. Here are the relevant parts of my code:
import theano.tensor as T
import numpy as np
import lasagne as nn
import importlib
import theano
from global_vars import *
theano.config.floatX = 'float32'
seq_len = 19
num_features = 42
config_name = 'pureConv'
config_initialize(config_name)
metadata_path = "metadata/pureConv/dump_pureConv-20230429-160908-223.pkl"
metadata = np.load(metadata_path, allow_pickle=True)
config = importlib.import_module("configurations.%s" % config_name)
params = np.array(metadata['param_values'])
l_in, l_out = config.build_model()
nn.layers.set_all_param_values(l_out, metadata['param_values'])
all_layers = nn.layers.get_all_layers(l_out)
i=0
for layer in all_layers:
#name = string.ljust(layer.__class__.__name__, 32)
name = layer.__class__.__name__
print(" layer %d: %s %s %s" % (i, name, nn.layers.get_output_shape(layer), nn.layers.count_params(layer)))
i+=1
layer_name = all_layers[34]
sym_x = T.tensor3()
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
grads = theano.gradient.jacobian(nn_output[:,0], wrt=sym_x)
res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
input_data = np.random.random((64, seq_len, num_features))
out, conv, grads =res(input_data)
print("Model output shape", out.shape)
print("Conv output shape",conv.shape)
print("Gradients out shape", grads.shape)
, which output:
layer 0: InputLayer (None, 19, 42) 0
layer 1: DimshuffleLayer (None, 42, 19) 0
layer 2: Conv1DLayer (None, 16, 19) 2016
layer 3: BatchNormLayer (None, 16, 19) 2080
layer 4: NonlinearityLayer (None, 16, 19) 2080
layer 5: Conv1DLayer (None, 16, 19) 3360
layer 6: BatchNormLayer (None, 16, 19) 3424
layer 7: NonlinearityLayer (None, 16, 19) 3424
layer 8: Conv1DLayer (None, 16, 19) 4704
layer 9: BatchNormLayer (None, 16, 19) 4768
layer 10: NonlinearityLayer (None, 16, 19) 4768
layer 11: ConcatLayer (None, 48, 19) 10272
layer 12: DimshuffleLayer (None, 19, 48) 10272
layer 13: ConcatLayer (None, 19, 90) 10272
layer 14: DimshuffleLayer (None, 90, 19) 10272
layer 15: Conv1DLayer (None, 16, 19) 14592
layer 16: BatchNormLayer (None, 16, 19) 14656
layer 17: NonlinearityLayer (None, 16, 19) 14656
layer 18: Conv1DLayer (None, 16, 19) 17472
layer 19: BatchNormLayer (None, 16, 19) 17536
layer 20: NonlinearityLayer (None, 16, 19) 17536
layer 21: Conv1DLayer (None, 16, 19) 20352
layer 22: BatchNormLayer (None, 16, 19) 20416
layer 23: NonlinearityLayer (None, 16, 19) 20416
layer 24: ConcatLayer (None, 48, 19) 32064
layer 25: DimshuffleLayer (None, 19, 48) 32064
layer 26: ConcatLayer (None, 19, 138) 32064
layer 27: DimshuffleLayer (None, 138, 19) 32064
layer 28: Conv1DLayer (None, 16, 19) 38688
layer 29: BatchNormLayer (None, 16, 19) 38752
layer 30: NonlinearityLayer (None, 16, 19) 38752
layer 31: Conv1DLayer (None, 16, 19) 43104
layer 32: BatchNormLayer (None, 16, 19) 43168
layer 33: NonlinearityLayer (None, 16, 19) 43168
layer 34: Conv1DLayer (None, 16, 19) 47520
layer 35: BatchNormLayer (None, 16, 19) 47584
layer 36: NonlinearityLayer (None, 16, 19) 47584
layer 37: ConcatLayer (None, 48, 19) 65376
layer 38: DimshuffleLayer (None, 19, 48) 65376
layer 39: ConcatLayer (None, 19, 186) 65376
layer 40: ReshapeLayer (64, 3534) 65376
layer 41: DenseLayer (64, 200) 772176
layer 42: BatchNormLayer (64, 200) 772976
layer 43: NonlinearityLayer (64, 200) 772976
layer 44: DenseLayer (64, 8) 774584
layer 45: NonlinearityLayer (64, 8) 774584
Model output shape (64, 8)
Conv output shape (64, 16, 19)
Gradients out shape (64, 64, 19, 42)
However, I've been having a lot of trouble figuring out how compute the gradient of the predicted class with regard to the output feature map of selected intermediate convolutional layer. as when I try the following line:
grads = theano.gradient.jacobian(nn_output[:,0], wrt=conv_output)
output the following error:
---------------------------------------------------------------------------
DisconnectedInputError Traceback (most recent call last)
Cell In[46], line 36
34 conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
35 nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
---> 36 grads = theano.gradient.jacobian(nn_output[:,0], conv_output)
37 res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
38 input_data = np.random.random((64, seq_len, num_features))
File *\lib\site-packages\theano\gradient.py:1912, in jacobian(expression, wrt, consider_constant, disconnected_inputs)
1907 return rvals
1908 # Computing the gradients does not affect the random seeds on any random
1909 # generator used n expression (because during computing gradients we are
1910 # just backtracking over old values. (rp Jan 2012 - if anyone has a
1911 # counter example please show me)
-> 1912 jacobs, updates = theano.scan(inner_function,
1913 sequences=arange(expression.shape[0]),
1914 non_sequences=[expression] + wrt)
1915 assert not updates, \
1916 ("Scan has returned a list of updates. This should not "
1917 "happen! Report this to theano-users (also include the "
1918 "script that generated the error)")
1919 return format_as(using_list, using_tuple, jacobs)
File *\lib\site-packages\theano\scan_module\scan.py:774, in scan(fn, sequences, outputs_info, non_sequences, n_steps, truncate_gradient, go_backwards, mode, name, profile, allow_gc, strict, return_list)
768 dummy_args = [arg for arg in args
769 if (not isinstance(arg, SharedVariable) and
770 not isinstance(arg, tensor.Constant))]
771 # when we apply the lambda expression we get a mixture of update rules
772 # and outputs that needs to be separated
--> 774 condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
775 if condition is not None:
776 as_while = True
File *\lib\site-packages\theano\gradient.py:1902, in jacobian..inner_function(*args)
1900 rvals = []
1901 for inp in args[2:]:
-> 1902 rval = grad(expr[idx],
1903 inp,
1904 consider_constant=consider_constant,
1905 disconnected_inputs=disconnected_inputs)
1906 rvals.append(rval)
1907 return rvals
File *\lib\site-packages\theano\gradient.py:589, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
586 for elem in wrt:
587 if elem not in var_to_app_to_idx and elem is not cost \
588 and elem not in grad_dict:
--> 589 handle_disconnected(elem)
590 grad_dict[elem] = disconnected_type()
592 cost_name = None
File *\lib\site-packages\theano\gradient.py:576, in grad..handle_disconnected(var)
574 elif disconnected_inputs == 'raise':
575 message = utils.get_variable_trace_string(var)
--> 576 raise DisconnectedInputError(message)
577 else:
578 raise ValueError("Invalid value for keyword "
579 "'disconnected_inputs', valid values are "
580 "'ignore', 'warn' and 'raise'.")
DisconnectedInputError:
Backtrace when that variable is created:
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3203, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\YN\AppData\Local\Temp\ipykernel_15448\4013410499.py", line 34, in
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
File "*\lib\site-packages\lasagne\layers\helper.py", line 197, in get_output
all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 352, in get_output_for
conved = self.convolve(input, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 511, in convolve
conved = self.convolution(input, self.W,
File "*\lib\site-packages\lasagne\theano_extensions\conv.py", line 75, in conv1d_mc0
return conved[:, :, 0, :] # drop the unused dimension
Am I missing something? Is there is a way to get this gradient computation to work?