Static layers

`graphs_on_grids.layers.base` #

`GraphBase` #

Bases: GraphLayer

Standard GNN layer. Implements $\textbf{H}^{(t+1)} = \sigma ((A+I)H^{(t)}W^{(t)})$

Source code in graphs_on_grids/layers/base.py

class GraphBase(GraphLayer):
    r"""
    Standard GNN layer. Implements
    $$
        \textbf{H}^{(t+1)} = \sigma ((A+I)H^{(t)}W^{(t)})
    $$
    """

    def __init__(
        self,
        adjacency_matrix: np.ndarray,
        embedding_size: int,
        hidden_units_node: list | tuple = None,
        hidden_units_edge: list | tuple = None,
        dropout_rate: int | float = 0,
        use_bias: bool = True,
        activation: str | None = None,
        aggregation_method: str = "sum",
        weight_initializer: str
        | keras.initializers.Initializer
        | None = "glorot_uniform",
        weight_regularizer: str | keras.regularizers.Regularizer | None = None,
        bias_initializer: str | keras.initializers.Initializer | None = "zeros",
    ):
        """
        :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
        :param embedding_size: the output dimensionality of the node feature vector
        :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        node features
        :param hidden_units_edge: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        edge features
        :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
        :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
        :param activation: Activation function to be used within the layer
        :param aggregation_method: Chooses the aggregation method for message passing. Either "sum" or "mean".
        :param weight_initializer: Weight initializer to be used within the layer
        :param weight_regularizer: Weight regularizer to be used within the layer
        :param bias_initializer: Bias initializer to be used within the layer
        """
        super(GraphBase, self).__init__(
            adjacency_matrix=adjacency_matrix,
            embedding_size=embedding_size,
            hidden_units_node=hidden_units_node,
            hidden_units_edge=hidden_units_edge,
            dropout_rate=dropout_rate,
            use_bias=use_bias,
            activation=activation,
            weight_initializer=weight_initializer,
            weight_regularizer=weight_regularizer,
            bias_initializer=bias_initializer,
        )
        if aggregation_method != "sum" and aggregation_method != "mean":
            raise ValueError(
                f"Received invalid aggregation method={aggregation_method}. Valid options are 'sum' or 'mean'."
            )

        self.aggregation_method = aggregation_method

        # Degree vector of adjacency matrix
        self._D = tf.reduce_sum(self._A_tilde, axis=1)

    def build(self, input_shape):
        if len(input_shape) == 2:
            self.edge_feature_MLP = self.create_edge_mlp()

    def call(self, inputs, *args, **kwargs):
        # if edge features are present
        if type(inputs) == list:
            node_features, edge_features = inputs

            gather = tf.gather(node_features, self.edges, axis=1)
            node_feature_shape = tf.shape(node_features)
            node_features_expanded = tf.reshape(
                gather,
                (
                    node_feature_shape[0],
                    tf.shape(self.edges)[0],
                    2 * node_feature_shape[2],
                ),
            )
            node_node_edge_features = self.combine_node_edge_features(
                edge_features, node_features, node_features_expanded
            )

            edge_weights = self.edge_feature_MLP(node_node_edge_features)

            edge_weights = tf.squeeze(edge_weights)

            # calculate mean edge weights over batch
            edge_weights_avg = tf.math.reduce_mean(edge_weights, axis=0)
            weighted_adj_matrix = tf.tensor_scatter_nd_update(
                self.adjacency_matrix, self.edges, edge_weights_avg
            )
            self._A_tilde = weighted_adj_matrix

        else:
            node_features = inputs

        masked_feature_matrix = tf.matmul(self._A_tilde, node_features)

        if self.aggregation_method == "mean":
            masked_feature_matrix = tf.divide(masked_feature_matrix, self._D[:, None])

        output = self.node_feature_MLP(masked_feature_matrix)

        if self.activation is not None:
            output = self.activation(output)

        return output

    def get_config(self):
        config = super().get_config()
        config["aggregation_method"] = self.aggregation_method
        return config

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, aggregation_method='sum', weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

Parameters:

Name	Type	Description	Default
`adjacency_matrix`	`ndarray`	adjacency matrix of the graphs to be passed to the model	required
`embedding_size`	`int`	the output dimensionality of the node feature vector	required
`hidden_units_node`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing node features	`None`
`hidden_units_edge`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing edge features	`None`
`dropout_rate`	`int \| float`	The dropout rate used after each dense layer in the node- or edge-MLPs	`0`
`use_bias`	`bool`	Whether to use bias in the hidden layers in the node- and edge-MLPs	`True`
`activation`	`str \| None`	Activation function to be used within the layer	`None`
`aggregation_method`	`str`	Chooses the aggregation method for message passing. Either "sum" or "mean".	`'sum'`
`weight_initializer`	`str \| Initializer \| None`	Weight initializer to be used within the layer	`'glorot_uniform'`
`weight_regularizer`	`str \| Regularizer \| None`	Weight regularizer to be used within the layer	`None`
`bias_initializer`	`str \| Initializer \| None`	Bias initializer to be used within the layer	`'zeros'`

Source code in graphs_on_grids/layers/base.py

def __init__(
    self,
    adjacency_matrix: np.ndarray,
    embedding_size: int,
    hidden_units_node: list | tuple = None,
    hidden_units_edge: list | tuple = None,
    dropout_rate: int | float = 0,
    use_bias: bool = True,
    activation: str | None = None,
    aggregation_method: str = "sum",
    weight_initializer: str
    | keras.initializers.Initializer
    | None = "glorot_uniform",
    weight_regularizer: str | keras.regularizers.Regularizer | None = None,
    bias_initializer: str | keras.initializers.Initializer | None = "zeros",
):
    """
    :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
    :param embedding_size: the output dimensionality of the node feature vector
    :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    node features
    :param hidden_units_edge: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    edge features
    :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
    :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
    :param activation: Activation function to be used within the layer
    :param aggregation_method: Chooses the aggregation method for message passing. Either "sum" or "mean".
    :param weight_initializer: Weight initializer to be used within the layer
    :param weight_regularizer: Weight regularizer to be used within the layer
    :param bias_initializer: Bias initializer to be used within the layer
    """
    super(GraphBase, self).__init__(
        adjacency_matrix=adjacency_matrix,
        embedding_size=embedding_size,
        hidden_units_node=hidden_units_node,
        hidden_units_edge=hidden_units_edge,
        dropout_rate=dropout_rate,
        use_bias=use_bias,
        activation=activation,
        weight_initializer=weight_initializer,
        weight_regularizer=weight_regularizer,
        bias_initializer=bias_initializer,
    )
    if aggregation_method != "sum" and aggregation_method != "mean":
        raise ValueError(
            f"Received invalid aggregation method={aggregation_method}. Valid options are 'sum' or 'mean'."
        )

    self.aggregation_method = aggregation_method

    # Degree vector of adjacency matrix
    self._D = tf.reduce_sum(self._A_tilde, axis=1)

`graphs_on_grids.layers.conv` #

`GraphConvolution` #

Bases: GraphLayer

Graph convolution layer as shown in the original paper

$\textbf{H}^{(t+1)} = \sigma \biggl( \tilde{D}^{-{1\over2}}\tilde{A}\tilde{D}^{-{1\over2}} H^{(t)}W^{(t)}\biggr)$ where where $\hat{A} = A + I$ is the adjacency matrix with added self-loops and $\tilde{D}$ is its degree matrix.

Source code in graphs_on_grids/layers/conv.py

class GraphConvolution(GraphLayer):
    r"""
    Graph convolution layer as shown in the [original paper](http://arxiv.org/abs/1609.02907)

    $$
        \textbf{H}^{(t+1)} = \sigma \biggl( \tilde{D}^{-{1\over2}}\tilde{A}\tilde{D}^{-{1\over2}} H^{(t)}W^{(t)}\biggr)
    $$ where where \( \hat{A} = A + I \) is the adjacency matrix with added self-loops
    and \( \tilde{D} \) is its degree matrix.
    """

    def __init__(
        self,
        adjacency_matrix: np.ndarray,
        embedding_size: int,
        hidden_units_node: list | tuple = None,
        hidden_units_edge: list | tuple = None,
        dropout_rate: int | float = 0,
        use_bias: bool = True,
        activation: str | None = None,
        weight_initializer: str
        | keras.initializers.Initializer
        | None = "glorot_uniform",
        weight_regularizer: str | keras.regularizers.Regularizer | None = None,
        bias_initializer: str | keras.initializers.Initializer | None = "zeros",
    ):
        """
        :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
        :param embedding_size: the output dimensionality of the node feature vector
        :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        node features
        :param hidden_units_edge: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        edge features
        :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
        :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
        :param activation: Activation function to be used within the layer
        :param weight_initializer: Weight initializer to be used within the layer
        :param weight_regularizer: Weight regularizer to be used within the layer
        :param bias_initializer: Bias initializer to be used within the layer
        """
        super(GraphConvolution, self).__init__(
            adjacency_matrix=adjacency_matrix,
            embedding_size=embedding_size,
            hidden_units_node=hidden_units_node,
            hidden_units_edge=hidden_units_edge,
            dropout_rate=dropout_rate,
            use_bias=use_bias,
            activation=activation,
            weight_initializer=weight_initializer,
            weight_regularizer=weight_regularizer,
            bias_initializer=bias_initializer,
        )

        # Degree matrix of adjacency matrix
        D = tf.zeros_like(self._A_tilde)
        D = tf.linalg.set_diag(D, tf.reduce_sum(self._A_tilde, axis=1))

        # Inverse of square root of degree matrix
        self._D_mod = tf.linalg.inv(tf.linalg.sqrtm(D))
        self._A_hat = tf.matmul(tf.matmul(self._D_mod, self._A_tilde), self._D_mod)

    def build(self, input_shape):
        if len(input_shape) == 2:
            self.edge_feature_MLP = self.create_edge_mlp()

    def call(self, inputs, *args, **kwargs):
        # if edge features are present
        if type(inputs) == list:
            node_features, edge_features = inputs

            gather = tf.gather(node_features, self.edges, axis=1)
            node_feature_shape = tf.shape(node_features)
            node_features_expanded = tf.reshape(
                gather,
                (
                    node_feature_shape[0],
                    tf.shape(self.edges)[0],
                    2 * node_feature_shape[2],
                ),
            )
            node_node_edge_features = self.combine_node_edge_features(
                edge_features, node_features, node_features_expanded
            )

            edge_weights = self.edge_feature_MLP(node_node_edge_features)

            edge_weights = tf.squeeze(edge_weights)

            # calculate mean edge weights over batch
            edge_weights_avg = tf.math.reduce_mean(edge_weights, axis=0)
            weighted_adj_matrix = tf.tensor_scatter_nd_update(
                self.adjacency_matrix, self.edges, edge_weights_avg
            )
            self._A_tilde = weighted_adj_matrix
            self._A_hat = tf.matmul(tf.matmul(self._D_mod, self._A_tilde), self._D_mod)
        else:
            node_features = inputs

        masked_feature_matrix = tf.matmul(self._A_hat, node_features)

        output = self.node_feature_MLP(masked_feature_matrix)

        if self.activation is not None:
            output = self.activation(output)

        return output

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

Parameters:

Name	Type	Description	Default
`adjacency_matrix`	`ndarray`	adjacency matrix of the graphs to be passed to the model	required
`embedding_size`	`int`	the output dimensionality of the node feature vector	required
`hidden_units_node`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing node features	`None`
`hidden_units_edge`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing edge features	`None`
`dropout_rate`	`int \| float`	The dropout rate used after each dense layer in the node- or edge-MLPs	`0`
`use_bias`	`bool`	Whether to use bias in the hidden layers in the node- and edge-MLPs	`True`
`activation`	`str \| None`	Activation function to be used within the layer	`None`
`weight_initializer`	`str \| Initializer \| None`	Weight initializer to be used within the layer	`'glorot_uniform'`
`weight_regularizer`	`str \| Regularizer \| None`	Weight regularizer to be used within the layer	`None`
`bias_initializer`	`str \| Initializer \| None`	Bias initializer to be used within the layer	`'zeros'`

Source code in graphs_on_grids/layers/conv.py

def __init__(
    self,
    adjacency_matrix: np.ndarray,
    embedding_size: int,
    hidden_units_node: list | tuple = None,
    hidden_units_edge: list | tuple = None,
    dropout_rate: int | float = 0,
    use_bias: bool = True,
    activation: str | None = None,
    weight_initializer: str
    | keras.initializers.Initializer
    | None = "glorot_uniform",
    weight_regularizer: str | keras.regularizers.Regularizer | None = None,
    bias_initializer: str | keras.initializers.Initializer | None = "zeros",
):
    """
    :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
    :param embedding_size: the output dimensionality of the node feature vector
    :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    node features
    :param hidden_units_edge: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    edge features
    :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
    :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
    :param activation: Activation function to be used within the layer
    :param weight_initializer: Weight initializer to be used within the layer
    :param weight_regularizer: Weight regularizer to be used within the layer
    :param bias_initializer: Bias initializer to be used within the layer
    """
    super(GraphConvolution, self).__init__(
        adjacency_matrix=adjacency_matrix,
        embedding_size=embedding_size,
        hidden_units_node=hidden_units_node,
        hidden_units_edge=hidden_units_edge,
        dropout_rate=dropout_rate,
        use_bias=use_bias,
        activation=activation,
        weight_initializer=weight_initializer,
        weight_regularizer=weight_regularizer,
        bias_initializer=bias_initializer,
    )

    # Degree matrix of adjacency matrix
    D = tf.zeros_like(self._A_tilde)
    D = tf.linalg.set_diag(D, tf.reduce_sum(self._A_tilde, axis=1))

    # Inverse of square root of degree matrix
    self._D_mod = tf.linalg.inv(tf.linalg.sqrtm(D))
    self._A_hat = tf.matmul(tf.matmul(self._D_mod, self._A_tilde), self._D_mod)

`graphs_on_grids.layers.attention` #

`GraphAttention` #

Bases: GraphLayer

Graph attention layer as shown in the original paper

$\textbf{H}^{(t+1)} = \sigma \biggl( \tilde{A_\alpha} H^{(t)}W^{(t)}\biggr)$ where $\tilde{A_\alpha}$ is the adjacency matrix weighted by the attention scores $\alpha$ and $\alpha$ is computed by: $\mathbf{\alpha}_{ij} =\frac{ \exp\left(\mathrm{LeakyReLU}\left( a^{\top} [(XW)_i \, \| \, (XW)_j]\right)\right)}{\sum\limits_{k \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathrm{LeakyReLU}\left( a^{\top} [(XW)_i \, \| \, (XW)_k]\right)\right)}$ for each node pair $(i,j)$ where $a \in \mathbb{R}^{2F'}$ is a trainable attention kernel.

Source code in graphs_on_grids/layers/attention.py

class GraphAttention(GraphLayer):
    r"""
    Graph attention layer as shown in the [original paper](https://arxiv.org/pdf/1710.10903.pdf)

    $$
        \textbf{H}^{(t+1)} = \sigma \biggl( \tilde{A_\alpha} H^{(t)}W^{(t)}\biggr)
    $$ where \( \tilde{A_\alpha} \) is the adjacency matrix weighted by the attention scores \(\alpha\)
    and \(\alpha\) is computed by:
    $$
        \mathbf{\alpha}_{ij} =\frac{ \exp\left(\mathrm{LeakyReLU}\left(
        a^{\top} [(XW)_i \, \| \, (XW)_j]\right)\right)}{\sum\limits_{k
        \in \mathcal{N}(i) \cup \{ i \}} \exp\left(\mathrm{LeakyReLU}\left(
        a^{\top} [(XW)_i \, \| \, (XW)_k]\right)\right)}
    $$ for each node pair \((i,j)\) where \(a \in \mathbb{R}^{2F'}\) is a trainable attention kernel.
    """

    def __init__(
        self,
        adjacency_matrix: np.ndarray,
        embedding_size: int,
        hidden_units_node: list | tuple = None,
        hidden_units_attention: list | tuple = None,
        dropout_rate: int | float = 0,
        use_bias: bool = True,
        activation: str | None = None,
        weight_initializer: str
        | keras.initializers.Initializer
        | None = "glorot_uniform",
        weight_regularizer: str | keras.regularizers.Regularizer | None = None,
        bias_initializer: str | keras.initializers.Initializer | None = "zeros",
    ):
        """
        :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
        :param embedding_size: the output dimensionality of the node feature vector
        :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        node features
        :param hidden_units_attention: list or tuple of neuron counts in the hidden layers used in the MLP for
        computing attention scores
        :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
        :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
        :param activation: Activation function to be used within the layer
        :param weight_initializer: Weight initializer to be used within the layer
        :param weight_regularizer: Weight regularizer to be used within the layer
        :param bias_initializer: Bias initializer to be used within the layer
        """
        super(GraphAttention, self).__init__(
            adjacency_matrix=adjacency_matrix,
            embedding_size=embedding_size,
            hidden_units_node=hidden_units_node,
            hidden_units_edge=hidden_units_attention,
            dropout_rate=dropout_rate,
            use_bias=use_bias,
            activation=activation,
            weight_initializer=weight_initializer,
            weight_regularizer=weight_regularizer,
            bias_initializer=bias_initializer,
        )

        self.edge_feature_indices = self.calculate_edge_feature_indices()

    def build(self, input_shape):
        self.attention_mlp = self.create_attention_mlp()

    def call(self, inputs, *args, **kwargs):
        # if edge features are present
        if type(inputs) == list:
            node_features, edge_features = inputs
        else:
            edge_features = None
            node_features = inputs

        # Update node embeddings through MLP
        node_states_transformed = self.node_feature_MLP(node_features)

        # Compute pair-wise attention scores
        node_states_expanded = tf.gather(node_states_transformed, self.edges, axis=1)
        node_states_expanded = tf.reshape(
            node_states_expanded,
            (
                tf.shape(node_features)[0],
                tf.shape(self.edges)[0],
                2 * self.embedding_size,
            ),
        )

        if type(inputs) == list:
            node_states_expanded = self.combine_node_edge_features(
                edge_features, node_features, node_states_expanded
            )

        attention_scores = self.attention_mlp(node_states_expanded)
        attention_scores = tf.squeeze(attention_scores, -1)

        # Normalize attention scores
        attention_scores = tf.math.exp(tf.clip_by_value(attention_scores, -2, 2))
        attention_scores_sum = tf.math.unsorted_segment_sum(
            data=tf.reduce_mean(attention_scores, axis=0),
            segment_ids=self.edges[:, 0],
            num_segments=tf.reduce_max(self.edges[:, 0]) + 1,
        )
        attention_scores_sum = tf.repeat(
            attention_scores_sum, tf.math.bincount(tf.cast(self.edges[:, 0], tf.int32))
        )
        attention_scores_norm = attention_scores / attention_scores_sum

        # apply attention scores and aggregate
        # attention scores are averaged for the whole batch
        # write normalized attention scores to position where adjacency_matrix equals one
        weighted_adj = tf.tensor_scatter_nd_update(
            self._A_tilde, self.edges, tf.reduce_mean(attention_scores_norm, axis=0)
        )
        output = tf.matmul(weighted_adj, node_states_transformed)

        if self.activation is not None:
            output = self.activation(output)

        return output

    def create_attention_mlp(self):
        self.attention_mlp_layers = []
        self.attention_mlp_layers = self.create_hidden_layers(
            self.hidden_units_edge, False
        )
        self.attention_dense_out = keras.layers.Dense(
            1, activation=keras.layers.LeakyReLU(0.2)
        )
        self.attention_mlp_layers.append(self.attention_dense_out)
        return keras.Sequential(
            self.attention_mlp_layers, name="sequential_attention_scores"
        )

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

Parameters:

Name	Type	Description	Default
`adjacency_matrix`	`ndarray`	adjacency matrix of the graphs to be passed to the model	required
`embedding_size`	`int`	the output dimensionality of the node feature vector	required
`hidden_units_node`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing node features	`None`
`hidden_units_attention`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for computing attention scores	`None`
`dropout_rate`	`int \| float`	The dropout rate used after each dense layer in the node- or edge-MLPs	`0`
`use_bias`	`bool`	Whether to use bias in the hidden layers in the node- and edge-MLPs	`True`
`activation`	`str \| None`	Activation function to be used within the layer	`None`
`weight_initializer`	`str \| Initializer \| None`	Weight initializer to be used within the layer	`'glorot_uniform'`
`weight_regularizer`	`str \| Regularizer \| None`	Weight regularizer to be used within the layer	`None`
`bias_initializer`	`str \| Initializer \| None`	Bias initializer to be used within the layer	`'zeros'`

Source code in graphs_on_grids/layers/attention.py

def __init__(
    self,
    adjacency_matrix: np.ndarray,
    embedding_size: int,
    hidden_units_node: list | tuple = None,
    hidden_units_attention: list | tuple = None,
    dropout_rate: int | float = 0,
    use_bias: bool = True,
    activation: str | None = None,
    weight_initializer: str
    | keras.initializers.Initializer
    | None = "glorot_uniform",
    weight_regularizer: str | keras.regularizers.Regularizer | None = None,
    bias_initializer: str | keras.initializers.Initializer | None = "zeros",
):
    """
    :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
    :param embedding_size: the output dimensionality of the node feature vector
    :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    node features
    :param hidden_units_attention: list or tuple of neuron counts in the hidden layers used in the MLP for
    computing attention scores
    :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
    :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
    :param activation: Activation function to be used within the layer
    :param weight_initializer: Weight initializer to be used within the layer
    :param weight_regularizer: Weight regularizer to be used within the layer
    :param bias_initializer: Bias initializer to be used within the layer
    """
    super(GraphAttention, self).__init__(
        adjacency_matrix=adjacency_matrix,
        embedding_size=embedding_size,
        hidden_units_node=hidden_units_node,
        hidden_units_edge=hidden_units_attention,
        dropout_rate=dropout_rate,
        use_bias=use_bias,
        activation=activation,
        weight_initializer=weight_initializer,
        weight_regularizer=weight_regularizer,
        bias_initializer=bias_initializer,
    )

    self.edge_feature_indices = self.calculate_edge_feature_indices()

`MultiHeadGraphAttention` #

Bases: GraphLayer

Multi-head graph attention layer as shown in the original paper

Computes num_heads independent graph attention layers and combines them by concatenation or averaging depending on the concat_heads parameter.

Source code in graphs_on_grids/layers/attention.py

class MultiHeadGraphAttention(GraphLayer):
    r"""
    Multi-head graph attention layer as shown in the [original paper](https://arxiv.org/pdf/1710.10903.pdf)

    Computes `num_heads` independent graph attention layers and combines them by concatenation or averaging
    depending on the `concat_heads` parameter.
    """

    def __init__(
        self,
        adjacency_matrix: np.ndarray,
        embedding_size: int,
        hidden_units_node: list | tuple = None,
        hidden_units_attention: list | tuple = None,
        dropout_rate: int | float = 0,
        num_heads: int = 3,
        use_bias: bool = True,
        activation: str | None = None,
        weight_initializer: str
        | keras.initializers.Initializer
        | None = "glorot_uniform",
        weight_regularizer: str | keras.regularizers.Regularizer | None = None,
        bias_initializer: str | keras.initializers.Initializer | None = "zeros",
        concat_heads: bool = True,
    ):
        """
        :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
        :param embedding_size: the output dimensionality of the node feature vector
        :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
        node features
        :param hidden_units_attention: list or tuple of neuron counts in the hidden layers used in the MLP for
        computing attention scores
        :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
        :param num_heads: Number of independent attention heads
        :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
        :param activation: Activation function to be used within the layer
        :param weight_initializer: Weight initializer to be used within the layer
        :param weight_regularizer: Weight regularizer to be used within the layer
        :param bias_initializer: Bias initializer to be used within the layer
        :param concat_heads: Whether to concatenate (True) results from the attention heads or average (False) them.
        """
        super(MultiHeadGraphAttention, self).__init__(
            adjacency_matrix=adjacency_matrix,
            hidden_units_node=hidden_units_node,
            hidden_units_edge=hidden_units_attention,
            embedding_size=embedding_size,
            dropout_rate=dropout_rate,
            use_bias=use_bias,
            activation=activation,
            weight_initializer=weight_initializer,
            weight_regularizer=weight_regularizer,
            bias_initializer=bias_initializer,
        )
        self.concat_heads = concat_heads
        self.num_heads = num_heads
        self.activation = keras.activations.get(activation)
        # do not create weights for Sequential node feature MLP
        self.node_feature_MLP = None
        self.attention_layers = [
            GraphAttention(
                adjacency_matrix=adjacency_matrix,
                hidden_units_node=hidden_units_node,
                hidden_units_attention=hidden_units_attention,
                embedding_size=embedding_size,
                dropout_rate=dropout_rate,
                use_bias=use_bias,
                activation=activation,
                weight_initializer=weight_initializer,
                weight_regularizer=weight_regularizer,
                bias_initializer=bias_initializer,
            )
            for _ in range(num_heads)
        ]

    def call(self, inputs, *args, **kwargs):
        # Obtain outputs from each attention head
        outputs = [attention_layer(inputs) for attention_layer in self.attention_layers]

        # Concatenate or average the node states from each head
        if self.concat_heads:
            outputs = tf.concat(outputs, axis=-1)
        else:
            outputs = tf.reduce_mean(tf.stack(outputs, axis=-1), axis=-1)

        if self.activation is not None:
            outputs = self.activation(outputs)

        return outputs

    def get_config(self):
        config = super().get_config()
        config["num_heads"] = self.num_heads
        config["concat_heads"] = self.concat_heads
        return config

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, num_heads=3, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros', concat_heads=True)` #

Parameters:

Name	Type	Description	Default
`adjacency_matrix`	`ndarray`	adjacency matrix of the graphs to be passed to the model	required
`embedding_size`	`int`	the output dimensionality of the node feature vector	required
`hidden_units_node`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for processing node features	`None`
`hidden_units_attention`	`list \| tuple`	list or tuple of neuron counts in the hidden layers used in the MLP for computing attention scores	`None`
`dropout_rate`	`int \| float`	The dropout rate used after each dense layer in the node- or edge-MLPs	`0`
`num_heads`	`int`	Number of independent attention heads	`3`
`use_bias`	`bool`	Whether to use bias in the hidden layers in the node- and edge-MLPs	`True`
`activation`	`str \| None`	Activation function to be used within the layer	`None`
`weight_initializer`	`str \| Initializer \| None`	Weight initializer to be used within the layer	`'glorot_uniform'`
`weight_regularizer`	`str \| Regularizer \| None`	Weight regularizer to be used within the layer	`None`
`bias_initializer`	`str \| Initializer \| None`	Bias initializer to be used within the layer	`'zeros'`
`concat_heads`	`bool`	Whether to concatenate (True) results from the attention heads or average (False) them.	`True`

Source code in graphs_on_grids/layers/attention.py

def __init__(
    self,
    adjacency_matrix: np.ndarray,
    embedding_size: int,
    hidden_units_node: list | tuple = None,
    hidden_units_attention: list | tuple = None,
    dropout_rate: int | float = 0,
    num_heads: int = 3,
    use_bias: bool = True,
    activation: str | None = None,
    weight_initializer: str
    | keras.initializers.Initializer
    | None = "glorot_uniform",
    weight_regularizer: str | keras.regularizers.Regularizer | None = None,
    bias_initializer: str | keras.initializers.Initializer | None = "zeros",
    concat_heads: bool = True,
):
    """
    :param adjacency_matrix: adjacency matrix of the graphs to be passed to the model
    :param embedding_size: the output dimensionality of the node feature vector
    :param hidden_units_node: list or tuple of neuron counts in the hidden layers used in the MLP for processing
    node features
    :param hidden_units_attention: list or tuple of neuron counts in the hidden layers used in the MLP for
    computing attention scores
    :param dropout_rate: The dropout rate used after each dense layer in the node- or edge-MLPs
    :param num_heads: Number of independent attention heads
    :param use_bias: Whether to use bias in the hidden layers in the node- and edge-MLPs
    :param activation: Activation function to be used within the layer
    :param weight_initializer: Weight initializer to be used within the layer
    :param weight_regularizer: Weight regularizer to be used within the layer
    :param bias_initializer: Bias initializer to be used within the layer
    :param concat_heads: Whether to concatenate (True) results from the attention heads or average (False) them.
    """
    super(MultiHeadGraphAttention, self).__init__(
        adjacency_matrix=adjacency_matrix,
        hidden_units_node=hidden_units_node,
        hidden_units_edge=hidden_units_attention,
        embedding_size=embedding_size,
        dropout_rate=dropout_rate,
        use_bias=use_bias,
        activation=activation,
        weight_initializer=weight_initializer,
        weight_regularizer=weight_regularizer,
        bias_initializer=bias_initializer,
    )
    self.concat_heads = concat_heads
    self.num_heads = num_heads
    self.activation = keras.activations.get(activation)
    # do not create weights for Sequential node feature MLP
    self.node_feature_MLP = None
    self.attention_layers = [
        GraphAttention(
            adjacency_matrix=adjacency_matrix,
            hidden_units_node=hidden_units_node,
            hidden_units_attention=hidden_units_attention,
            embedding_size=embedding_size,
            dropout_rate=dropout_rate,
            use_bias=use_bias,
            activation=activation,
            weight_initializer=weight_initializer,
            weight_regularizer=weight_regularizer,
            bias_initializer=bias_initializer,
        )
        for _ in range(num_heads)
    ]

`graphs_on_grids.layers.output_layer` #

`FlattenedDenseOutput` #

Bases: Layer

A utility output layer that takes in a 2D feature matrix and flattens it before passing it through a regular dense layer. The output feature matrix is reshaped to be 2D again.

Source code in graphs_on_grids/layers/output_layer.py

class FlattenedDenseOutput(keras.layers.Layer):
    """
    A utility output layer that takes in a 2D feature matrix and flattens it before passing it through a regular
    dense layer. The output feature matrix is reshaped to be 2D again.
    """

    def __init__(self, units: int, activation: str = None) -> None:
        """

        :param units: dimensionality of the output node feature vector
        :param activation: activation function used in dense layer
        """
        super().__init__()
        self.units = units
        self.activation = keras.activations.get(activation)

    def build(self, input_shape):
        batch_size, num_nodes, embedding_size = input_shape
        self.dense_flat = keras.layers.Dense(
            num_nodes * self.units, activation=self.activation
        )

    def call(self, inputs, *args, **kwargs):
        _, num_nodes, embedding_size = inputs.get_shape().as_list()
        flattened = tf.reshape(inputs, (-1, num_nodes * embedding_size))
        flat_dense_output = self.dense_flat(flattened)
        output_reshaped = tf.reshape(flat_dense_output, (-1, num_nodes, self.units))
        return output_reshaped

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "units": self.units,
                "activation": keras.activations.serialize(self.activation),
            }
        )
        return config

`init(units, activation=None)` #

Parameters:

Name	Type	Description	Default
`units`	`int`	dimensionality of the output node feature vector	required
`activation`	`str`	activation function used in dense layer	`None`

Source code in graphs_on_grids/layers/output_layer.py

def __init__(self, units: int, activation: str = None) -> None:
    """

    :param units: dimensionality of the output node feature vector
    :param activation: activation function used in dense layer
    """
    super().__init__()
    self.units = units
    self.activation = keras.activations.get(activation)

Static layers

graphs_on_grids.layers.base #

GraphBase #

__init__(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, aggregation_method='sum', weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros') #

graphs_on_grids.layers.conv #

GraphConvolution #

__init__(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros') #

graphs_on_grids.layers.attention #

GraphAttention #

__init__(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros') #

MultiHeadGraphAttention #

__init__(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, num_heads=3, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros', concat_heads=True) #

graphs_on_grids.layers.output_layer #

FlattenedDenseOutput #

__init__(units, activation=None) #

`graphs_on_grids.layers.base` #

`GraphBase` #

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, aggregation_method='sum', weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

`graphs_on_grids.layers.conv` #

`GraphConvolution` #

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_edge=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

`graphs_on_grids.layers.attention` #

`GraphAttention` #

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros')` #

`MultiHeadGraphAttention` #

`init(adjacency_matrix, embedding_size, hidden_units_node=None, hidden_units_attention=None, dropout_rate=0, num_heads=3, use_bias=True, activation=None, weight_initializer='glorot_uniform', weight_regularizer=None, bias_initializer='zeros', concat_heads=True)` #

`graphs_on_grids.layers.output_layer` #

`FlattenedDenseOutput` #

`init(units, activation=None)` #