Source code for KGE.models.translating_based.TransR

"""An implementation of TransR
"""

import logging
import numpy as np
import tensorflow as tf
from ..base_model.TranslatingModel import TranslatingModel
from ...score import LpDistancePow
from ...loss import PairwiseHingeLoss
from ...ns_strategy import UniformStrategy
from ...constraint import clip_constraint

logging.getLogger().setLevel(logging.INFO)

[docs]class TransR(TranslatingModel):
    """An implementation of TransR from `[lin 2015] <https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewFile/9571/9523/>`_.

    Both TransE and TransH assume embeddings of entities and relations are in the same embedding space :math:`\mathbb{R}_k`.
    But relations and entities are completely different objects, it may be not capable to represent them in the same
    semantic space. To address this issue, TransH models entities and relations in distinct embedding spaces, i.e., entity
    space and relation spaces. \n
    TransH represents each entity as :math:`\\textbf{e}_i \in \mathbb{R}^k` and each relation as
    :math:`\\textbf{r}_i \in \mathbb{R}^d`, the dimensions of entity embeddings and relation embeddings are not necessarily
    identical. For each relation, TransH set a projection matrix :math:`\\textbf{M}_i \in \mathbb{R}^{k \\times d}`, which
    projects entities from entity space to relation space, expecting the projected entity embeddings can be connected by
    the relation embeddings in the relation spaces:
    
    .. math::
        {\\textbf{e}_h}_{\perp} + \\textbf{r}_r \\approx {\\textbf{e}_t}_{\perp}

        {\\textbf{e}_h}_{\perp} = \\textbf{e}_h \\textbf{M}_r

        {\\textbf{e}_t}_{\perp} = \\textbf{e}_t \\textbf{M}_r

    where :math:`\\textbf{e}_i \in \mathbb{R}^k` are vector representations of the entities,
    :math:`\\textbf{r}_i \in \mathbb{R}^d` are vector representations of the relations,
    and :math:`\\textbf{M}_i \in \mathbb{R}^{k \\times d}` are relation projection matrix.
    
    The score of :math:`(h,r,t)` is:

    .. math::
        f(h,r,t) = s({\\textbf{e}_h}_{\perp} + \\textbf{r}_r, {\\textbf{e}_t}_{\perp})

    where :math:`s` is a scoring function (:py:mod:`KGE.score`) that scores the plausibility of matching between
    :math:`(translation, predicate)`. \n
    By default, using :py:mod:`KGE.score.LpDistancePow`, negative squared L2-distance: 
    
    .. math::
        s({\\textbf{e}_h}_{\perp} + \\textbf{r}_r, {\\textbf{e}_t}_{\perp}) =
            - \left\| {\\textbf{e}_h}_{\perp} + \\textbf{r}_r - {\\textbf{e}_t}_{\perp} \\right\|_2^2

    You can change to L1-distance by giving :code:`score_fn=LpDistancePow(p=1)` in :py:func:`__init__`,
    or change any score function you like by specifying :code:`score_fn` in :py:func:`__init__`.

    If :code:`constraint=True` given in :py:func:`__init__`, conduct following constraints: \n
    1. :math:`\left\| \\textbf{e}_h \\right\|_2 \leq 1`  and :math:`\left\| \\textbf{r}_r \\right\|_2 \leq 1` and :math:`\left\| \\textbf{e}_t \\right\|_2 \leq 1` \n
    2. :math:`\left\| \\textbf{e}_h \\textbf{M}_r \\right\|_2 \leq 1` and  :math:`\left\| \\textbf{e}_t \\textbf{M}_r \\right\|_2 \leq 1` \n
    
    Since the `original TransR paper <https://ojs.aaai.org/index.php/AAAI/article/view/8870>`_ dose not specify how
    they conduct these constraints, here we use :py:func:`KGE.constraint.clip_constraint` which restrict the tensor's
    norm does not exceeds some value, if exceeds, clip the tensor norm to given threshold value.
    """

[docs]    def __init__(self, embedding_params, negative_ratio, corrupt_side, 
                 score_fn=LpDistancePow(p=2), loss_fn=PairwiseHingeLoss(margin=1),
                 ns_strategy=UniformStrategy, constraint=True, n_workers=1):
        """Initialized TransR

        Parameters
        ----------
        embedding_params : dict
            embedding dimension parameters, should have following keys: \n
            :code:`'ent_embedding_size'` for entity embedding dimension :math:`k`
            :code:`'rel_embedding_size'` for relation embedding dimension :math:`d`
        negative_ratio : int
            number of negative sample
        corrupt_side : str
            corrupt from which side while trainging, can be :code:`'h'`, :code:`'t'`, or :code:`'h+t'`
        score_fn : function, optional
            scoring function, by default :py:mod:`KGE.score.LpDistancePow`
        loss_fn : class, optional
            loss function class :py:mod:`KGE.loss.Loss`, by default :py:mod:`KGE.loss.PairwiseHingeLoss`
        ns_strategy : function, optional
            negative sampling strategy, by default :py:func:`KGE.ns_strategy.uniform_strategy`
        constraint : bool, optional
            conduct constraint or not, by default True
        n_workers : int, optional
            number of workers for negative sampling, by default 1
        """

        super(TransR, self).__init__(embedding_params, negative_ratio, corrupt_side,
                                     score_fn, loss_fn, ns_strategy, n_workers)
        self.constraint = constraint
        
    def _init_embeddings(self, seed):
        """Initialized the TransR embeddings.

        If :code:`model_weight_initial` not given in :py:func:`train`, initialized embeddings randomly,  
        otherwise, initialized from :code:`model_weight_initial`. 

        Parameters
        ----------
        seed : int
            random seed
        """

        if self._model_weights_initial is None:
            assert self.embedding_params.get("ent_embedding_size") is not None, "'ent_embedding_size' should be given in embedding_params when using TransR"
            assert self.embedding_params.get("rel_embedding_size") is not None, "'rel_embedding_size' should be given in embedding_params when using TransR"
                
            limit = np.sqrt(6.0 / self.embedding_params["ent_embedding_size"])
            uniform_initializer = tf.initializers.RandomUniform(minval=-limit, maxval=limit, seed=seed)
            ent_emb = tf.Variable(
                uniform_initializer([len(self.metadata["ind2ent"]), self.embedding_params["ent_embedding_size"]]),
                name="entities_embedding", dtype=np.float32
            )

            limit = np.sqrt(6.0 / self.embedding_params["rel_embedding_size"])
            uniform_initializer = tf.initializers.RandomUniform(minval=-limit, maxval=limit, seed=seed)
            rel_emb = tf.Variable(
                uniform_initializer([len(self.metadata["ind2rel"]), self.embedding_params["rel_embedding_size"]]),
                name="relations_embedding", dtype=np.float32
            )

            rel_proj = tf.Variable(
                tf.eye(num_rows=self.embedding_params["ent_embedding_size"], num_columns=self.embedding_params["rel_embedding_size"], batch_shape=[len(self.metadata["ind2rel"])]),
                name="relations_projector", dtype=np.float32
            )     

            self.model_weights = {"ent_emb": ent_emb, "rel_emb": rel_emb, "rel_proj": rel_proj}
        else:
            self._check_model_weights(self._model_weights_initial)
            self.model_weights = self._model_weights_initial

    def _check_model_weights(self, model_weights):
        """Check the model_weights have necessary keys and dimensions

        Parameters
        ----------
        model_weights : dict
            model weights to check.
        """

        assert model_weights.get("ent_emb") is not None, "entity embedding should be given in model_weights with key 'ent_emb'"
        assert model_weights.get("rel_emb") is not None, "relation embedding should be given in model_weights with key 'rel_emb'"
        assert model_weights.get("rel_proj") is not None, "relation projection matrix should be given in model_weights with key 'rel_proj'"
        assert list(model_weights["ent_emb"].shape) == [len(self.metadata["ind2ent"]), self.embedding_params["ent_embedding_size"]], \
            "shape of 'ent_emb' should be (len(metadata['ind2ent']), embedding_params['ent_embedding_size'])"
        assert list(model_weights["rel_emb"].shape) == [len(self.metadata["ind2rel"]), self.embedding_params["rel_embedding_size"]], \
            "shape of 'rel_emb' should be (len(metadata['ind2rel']), embedding_params['rel_embedding_size'])"
        assert list(model_weights["rel_proj"].shape) == [len(self.metadata["ind2rel"]), self.embedding_params["ent_embedding_size"], self.embedding_params["rel_embedding_size"]], \
            "shape of 'rel_emb' should be (len(metadata['ind2rel']), embedding_params['ent_embedding_size'], embedding_params['rel_embedding_size'])"

[docs]    def score_hrt(self, h, r, t):
        """ Score the triplets :math:`(h,r,t)`.

        If :code:`h` is :code:`None`, score all entities: :math:`(h_i, r, t)`. \n
        If :code:`t` is :code:`None`, score all entities: :math:`(h, r, t_i)`. \n
        :code:`h` and :code:`t` should not be :code:`None` simultaneously.

        Parameters
        ----------
        h : tf.Tensor or np.ndarray or None
            index of heads with shape :code:`(n,)`
        r : tf.Tensor or np.ndarray
            index of relations with shape :code:`(n,)`
        t : tf.Tensor or np.ndarray or None
            index of tails with shape :code:`(n,)`

        Returns
        -------
        tf.Tensor
            triplets scores with shape :code:`(n,)`
        """

        h,r,t = super(TransR, self).score_hrt(h,r,t)

        h_emb = tf.expand_dims(tf.nn.embedding_lookup(self.model_weights["ent_emb"], h), axis=-1)
        r_emb = tf.nn.embedding_lookup(self.model_weights["rel_emb"], r)
        t_emb = tf.expand_dims(tf.nn.embedding_lookup(self.model_weights["ent_emb"], t), axis=-1)

        r_proj = tf.nn.embedding_lookup(self.model_weights["rel_proj"], r)

        h_proj = tf.squeeze(tf.matmul(h_emb, r_proj, transpose_a=True))
        t_proj = tf.squeeze(tf.matmul(t_emb, r_proj, transpose_a=True))

        if self.constraint:
            h_proj = clip_constraint(X=h_proj, p=2, axis=-1, value=1)
            t_proj = clip_constraint(X=t_proj, p=2, axis=-1, value=1)

        return self.score_fn(h_proj + r_emb, t_proj)

    def _constraint_loss(self, X):
        """Perform constraint if necessary.

        Parameters
        ----------
        X : batch_data
            batch data

        Returns
        -------
        tf.Tensor
            regularization term with shape (1,)
        """

        if self.constraint:
            self.model_weights["ent_emb"].assign(clip_constraint(X=self.model_weights["ent_emb"], p=2, axis=-1, value=1))
            self.model_weights["rel_emb"].assign(clip_constraint(X=self.model_weights["rel_emb"], p=2, axis=-1, value=1))

        return 0