Source code for beta_rec.models.vlml

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


[docs]class VariableLengthMemoryLayer(nn.Module): """VariableLengthMemoryLayer Class.""" def __init__(self, hops, emb_dim, device): """Initialize VariableLenghtMemoryLayer Class.""" super(VariableLengthMemoryLayer, self).__init__() self.hops = hops self.device = device self.emb_dim = emb_dim self.hop_mapping = {} for h in range(hops - 1): self.hop_mapping[str(h + 1)] = nn.Linear( self.emb_dim, self.emb_dim, bias=True ) self.hop_mapping[str(h + 1)].weight.requires_grad = True self.hop_mapping[str(h + 1)].bias.requires_grad = True nn.init.kaiming_normal_(self.hop_mapping[str(h + 1)].weight) self.hop_mapping[str(h + 1)].bias.data.fill_(1.0) self.hop_mapping = nn.ModuleDict(self.hop_mapping)
[docs] def mask_mod(self, inputs, mask_length, maxlen=None): """Use a memory mask. Apply a memory mask such that the values we mask result in being the minimum possible value we can represent with a float32. :param inputs: [batch size, length], dtype=tf.float32. :param memory_mask: [batch_size] shape Tensor of ints indicating the length of inputs. :param maxlen: Sets the maximum length of the sequence; if None, inferred from inputs. :returns: [batch size, length] dim Tensor with the mask applied. """ # [batch_size, length] => Sequence Mask memory_mask = torch.arange(maxlen).to(self.device).expand( len(mask_length), maxlen ) < mask_length.unsqueeze(1) memory_mask = memory_mask.float() # num_remaining_memory_slots = torch.sum(memory_mask, 1) # Get the numerical limits of a float finfo = np.finfo(np.float32) kept_indices = memory_mask ignored_indices = memory_mask < 1 ignored_indices = ignored_indices.float() lower_bound = finfo.max * kept_indices + finfo.min * ignored_indices slice_length = torch.max(mask_length) # Return the elementwise return torch.min(inputs[:, :slice_length], lower_bound[:, :slice_length])
[docs] def apply_attention_memory( self, memory, output_memory, query, memory_mask=None, maxlen=None ): """Apply attention memory. Args: :param memory: [batch size, max length, embedding size], typically Matrix M. :param output_memory: [batch size, max length, embedding size], typically Matrix C. :param query: [batch size, embed size], typically u. :param memory_mask: [batch size] dim Tensor, the length of each sequence if variable length. :param maxlen: int/Tensor, the maximum sequence padding length; if None it infers based on the max of memory_mask. :returns: AttentionOutput output: [batch size, embedding size]. weight: [batch size, max length], the attention weights applied to the output representation. """ query_expanded = query.unsqueeze(-1).transpose(2, 1) batched_dot_prod = query_expanded * memory scores = batched_dot_prod.sum(2) if memory_mask is not None: scores = self.mask_mod(scores, memory_mask, maxlen) attention = F.softmax(scores, dim=-1) probs_temp = attention.unsqueeze(1) c_temp = output_memory.transpose(2, 1) neighborhood = c_temp * probs_temp weighted_output = neighborhood.sum(2) return {"weight": attention, "output": weighted_output}
[docs] def forward(self, query, memory, output_memory, seq_length, maxlen=32): """Train the model.""" # find maximum length of sequences in this batch cur_max = torch.max(seq_length).item() # slice to max length memory = memory[:, :cur_max] output_memory = output_memory[:, :cur_max] user_query, item_query = query hop_outputs = [] # hop 0 # z = m_u + e_i z = user_query + item_query for hop_k in range(self.hops): # hop 1, ... , hop self.hops-1 if hop_k == 0: memory_hop = self.apply_attention_memory( memory, output_memory, z, seq_length, maxlen ) else: z = F.relu(self.hop_mapping[str(hop_k)](z) + memory_hop["output"]) # apply attention memory_hop = self.apply_attention_memory( memory, output_memory, z, seq_length, maxlen ) hop_outputs.append(memory_hop) return hop_outputs