Computing gradient of output with respect to an intermediate layer in Theano

I'm trying to implement heatmaps of class activation in theano, based on section 5.4.3 Visualizing heatmaps of class activation of Deep learning with Python.

I'm able to compute the gradient of the predicted class (class 0) with regard to input samples in the mini-batch. Here are the relevant parts of my code:
import theano.tensor as T
import numpy as np
import lasagne as nn
import importlib
import theano
from global_vars import *
theano.config.floatX = 'float32'

seq_len = 19
num_features = 42
config_name = 'pureConv'
config_initialize(config_name)

metadata_path = "metadata/pureConv/dump_pureConv-20230429-160908-223.pkl"
metadata = np.load(metadata_path, allow_pickle=True)

config = importlib.import_module("configurations.%s" % config_name)
params = np.array(metadata['param_values'])
l_in, l_out = config.build_model()
nn.layers.set_all_param_values(l_out, metadata['param_values'])

all_layers = nn.layers.get_all_layers(l_out)
i=0
for layer in all_layers:
#name = string.ljust(layer.__class__.__name__, 32)
name = layer.__class__.__name__
print(" layer %d: %s %s %s" % (i, name, nn.layers.get_output_shape(layer), nn.layers.count_params(layer)))
i+=1

layer_name = all_layers[34]
sym_x = T.tensor3()
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
grads = theano.gradient.jacobian(nn_output[:,0], wrt=sym_x)
res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
input_data = np.random.random((64, seq_len, num_features))

out, conv, grads =res(input_data)
print("Model output shape", out.shape)
print("Conv output shape",conv.shape)
print("Gradients out shape", grads.shape)

, which output:
layer 0: InputLayer (None, 19, 42) 0
layer 1: DimshuffleLayer (None, 42, 19) 0
layer 2: Conv1DLayer (None, 16, 19) 2016
layer 3: BatchNormLayer (None, 16, 19) 2080
layer 4: NonlinearityLayer (None, 16, 19) 2080
layer 5: Conv1DLayer (None, 16, 19) 3360
layer 6: BatchNormLayer (None, 16, 19) 3424
layer 7: NonlinearityLayer (None, 16, 19) 3424
layer 8: Conv1DLayer (None, 16, 19) 4704
layer 9: BatchNormLayer (None, 16, 19) 4768
layer 10: NonlinearityLayer (None, 16, 19) 4768
layer 11: ConcatLayer (None, 48, 19) 10272
layer 12: DimshuffleLayer (None, 19, 48) 10272
layer 13: ConcatLayer (None, 19, 90) 10272
layer 14: DimshuffleLayer (None, 90, 19) 10272
layer 15: Conv1DLayer (None, 16, 19) 14592
layer 16: BatchNormLayer (None, 16, 19) 14656
layer 17: NonlinearityLayer (None, 16, 19) 14656
layer 18: Conv1DLayer (None, 16, 19) 17472
layer 19: BatchNormLayer (None, 16, 19) 17536
layer 20: NonlinearityLayer (None, 16, 19) 17536
layer 21: Conv1DLayer (None, 16, 19) 20352
layer 22: BatchNormLayer (None, 16, 19) 20416
layer 23: NonlinearityLayer (None, 16, 19) 20416
layer 24: ConcatLayer (None, 48, 19) 32064
layer 25: DimshuffleLayer (None, 19, 48) 32064
layer 26: ConcatLayer (None, 19, 138) 32064
layer 27: DimshuffleLayer (None, 138, 19) 32064
layer 28: Conv1DLayer (None, 16, 19) 38688
layer 29: BatchNormLayer (None, 16, 19) 38752
layer 30: NonlinearityLayer (None, 16, 19) 38752
layer 31: Conv1DLayer (None, 16, 19) 43104
layer 32: BatchNormLayer (None, 16, 19) 43168
layer 33: NonlinearityLayer (None, 16, 19) 43168
layer 34: Conv1DLayer (None, 16, 19) 47520
layer 35: BatchNormLayer (None, 16, 19) 47584
layer 36: NonlinearityLayer (None, 16, 19) 47584
layer 37: ConcatLayer (None, 48, 19) 65376
layer 38: DimshuffleLayer (None, 19, 48) 65376
layer 39: ConcatLayer (None, 19, 186) 65376
layer 40: ReshapeLayer (64, 3534) 65376
layer 41: DenseLayer (64, 200) 772176
layer 42: BatchNormLayer (64, 200) 772976
layer 43: NonlinearityLayer (64, 200) 772976
layer 44: DenseLayer (64, 8) 774584
layer 45: NonlinearityLayer (64, 8) 774584

Model output shape (64, 8)
Conv output shape (64, 16, 19)
Gradients out shape (64, 64, 19, 42)

However, I've been having a lot of trouble figuring out how compute the gradient of the predicted class with regard to the output feature map of selected intermediate convolutional layer. as when I try the following line:
grads = theano.gradient.jacobian(nn_output[:,0], wrt=conv_output)

output the following error:
---------------------------------------------------------------------------
DisconnectedInputError Traceback (most recent call last)
Cell In[46], line 36
34 conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
35 nn_output = nn.layers.get_output(l_out, sym_x, deterministic=True) #softmax output
---> 36 grads = theano.gradient.jacobian(nn_output[:,0], conv_output)
37 res = theano.function(inputs=[sym_x], outputs=[nn_output, conv_output, grads],allow_input_downcast=True)
38 input_data = np.random.random((64, seq_len, num_features))

File *\lib\site-packages\theano\gradient.py:1912, in jacobian(expression, wrt, consider_constant, disconnected_inputs)
1907 return rvals
1908 # Computing the gradients does not affect the random seeds on any random
1909 # generator used n expression (because during computing gradients we are
1910 # just backtracking over old values. (rp Jan 2012 - if anyone has a
1911 # counter example please show me)
-> 1912 jacobs, updates = theano.scan(inner_function,
1913 sequences=arange(expression.shape[0]),
1914 non_sequences=[expression] + wrt)
1915 assert not updates, \
1916 ("Scan has returned a list of updates. This should not "
1917 "happen! Report this to theano-users (also include the "
1918 "script that generated the error)")
1919 return format_as(using_list, using_tuple, jacobs)

File *\lib\site-packages\theano\scan_module\scan.py:774, in scan(fn, sequences, outputs_info, non_sequences, n_steps, truncate_gradient, go_backwards, mode, name, profile, allow_gc, strict, return_list)
768 dummy_args = [arg for arg in args
769 if (not isinstance(arg, SharedVariable) and
770 not isinstance(arg, tensor.Constant))]
771 # when we apply the lambda expression we get a mixture of update rules
772 # and outputs that needs to be separated
--> 774 condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
775 if condition is not None:
776 as_while = True

File *\lib\site-packages\theano\gradient.py:1902, in jacobian..inner_function(*args)
1900 rvals = []
1901 for inp in args[2:]:
-> 1902 rval = grad(expr[idx],
1903 inp,
1904 consider_constant=consider_constant,
1905 disconnected_inputs=disconnected_inputs)
1906 rvals.append(rval)
1907 return rvals

File *\lib\site-packages\theano\gradient.py:589, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
586 for elem in wrt:
587 if elem not in var_to_app_to_idx and elem is not cost \
588 and elem not in grad_dict:
--> 589 handle_disconnected(elem)
590 grad_dict[elem] = disconnected_type()
592 cost_name = None

File *\lib\site-packages\theano\gradient.py:576, in grad..handle_disconnected(var)
574 elif disconnected_inputs == 'raise':
575 message = utils.get_variable_trace_string(var)
--> 576 raise DisconnectedInputError(message)
577 else:
578 raise ValueError("Invalid value for keyword "
579 "'disconnected_inputs', valid values are "
580 "'ignore', 'warn' and 'raise'.")

DisconnectedInputError:
Backtrace when that variable is created:

File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3203, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3382, in run_ast_nodes
if await self.run_code(code, result, async_=asy):
File "*\lib\site-packages\IPython\core\interactiveshell.py", line 3442, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\YN\AppData\Local\Temp\ipykernel_15448\4013410499.py", line 34, in
conv_output = nn.layers.get_output(layer_name, sym_x, deterministic=True) #Conv1DLayer
File "*\lib\site-packages\lasagne\layers\helper.py", line 197, in get_output
all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 352, in get_output_for
conved = self.convolve(input, **kwargs)
File "*\lib\site-packages\lasagne\layers\conv.py", line 511, in convolve
conved = self.convolution(input, self.W,
File "*\lib\site-packages\lasagne\theano_extensions\conv.py", line 75, in conv1d_mc0
return conved[:, :, 0, :] # drop the unused dimension

Am I missing something? Is there is a way to get this gradient computation to work?

02 December, 2023

Computing gradient of output with respect to an intermediate layer in Theano

0 comments:

Post a Comment

Meta

Popular Posts

Categories

Social Media Links

Pages

Blog Archive

Laravel News