Source code for beta_rec.data.auxiliary_data

import numpy as np

from ..datasets.data_load import load_item_fea_dic
from ..utils.common_util import get_random_rep


[docs]class Auxiliary(object): r"""A Auxiliary Data object, which is able read various feature for users and items. Args: config (dict): configs dict. """ def __init__(self, config, n_users, n_items): """Initialize Auxiliary Class.""" self.config = config self.n_users = n_users self.n_items = n_items self.random_dim = 512 # subset of the dataset. use a small set of users and items if >0, otherwise use full dataset if config and "random_dim" in config: self.random_dim = config["model"]["random_dim"] # self.init_item_fea() # self.init_user_fea()
[docs] def init_item_fea(self): """Initialize item feature.""" config = self.config if config and "item_fea_type" in config["dataset"]: fea_type = config["dataset"]["item_fea_type"] else: fea_type = "random" if fea_type == "random": self.item_feature = get_random_rep(self.n_items, self.random_dim) elif fea_type == "one_hot": item_fea_dic = load_item_fea_dic(config, fea_type="one_hot") self.item_feature = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) elif fea_type == "word2vec": item_fea_dic = load_item_fea_dic(config, fea_type="word2vec") self.item_feature = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) elif fea_type == "bert": item_fea_dic = load_item_fea_dic(config, fea_type="bert") self.item_feature = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) elif fea_type == "random_one_hot" or fea_type == "one_hot_random": rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic = load_item_fea_dic(config, fea_type="one_hot") item_fea_list = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate((item_fea_list, rand_item_fea), axis=1) elif fea_type == "random_cate" or fea_type == "cate_random": rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic = load_item_fea_dic(config, fea_type="cate") item_fea_list = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate((item_fea_list, rand_item_fea), axis=1) elif fea_type == "random_word2vec" or fea_type == "word2vec_random": rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic = load_item_fea_dic(config, fea_type="word2vec") item_fea_list = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate((item_fea_list, rand_item_fea), axis=1) elif fea_type == "random_bert" or fea_type == "bert_random": rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic = load_item_fea_dic(config, fea_type="bert") item_fea_list = np.array( [item_fea_dic[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate((item_fea_list, rand_item_fea), axis=1) elif fea_type == "word2vec_one_hot" or fea_type == "one_hot_word2vec": item_fea_dic1 = load_item_fea_dic(config, fea_type="one_hot") item_fea_dic2 = load_item_fea_dic(config, fea_type="word2vec") item_fea_list1 = np.array( [item_fea_dic1[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list2 = np.array( [item_fea_dic2[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate((item_fea_list1, item_fea_list2), axis=1) elif ( fea_type == "word2vec_one_hot_random" or fea_type == "random_one_hot_word2vec" or fea_type == "one_hot_random_word2vec" or fea_type == "random_word2vec_one_hot" ): rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic1 = load_item_fea_dic(config, fea_type="one_hot") item_fea_dic2 = load_item_fea_dic(config, fea_type="word2vec") item_fea_list1 = np.array( [item_fea_dic1[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list2 = np.array( [item_fea_dic2[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate( (item_fea_list1, item_fea_list2, rand_item_fea), axis=1 ) elif ( fea_type == "word2vec_one_hot_bert" or fea_type == "bert_one_hot_word2vec" or fea_type == "one_hot_bert_word2vec" ): item_fea_dic1 = load_item_fea_dic(config, fea_type="one_hot") item_fea_dic2 = load_item_fea_dic(config, fea_type="word2vec") item_fea_dic3 = load_item_fea_dic(config, fea_type="bert") item_fea_list1 = np.array( [item_fea_dic1[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list2 = np.array( [item_fea_dic2[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list3 = np.array( [item_fea_dic3[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate( (item_fea_list1, item_fea_list2, item_fea_list3), axis=1 ) elif ( fea_type == "random_word2vec_one_hot_bert" or fea_type == "bert_one_hot_word2vec_random" or fea_type == "one_hot_random_bert_word2vec" or fea_type == "one_hot_bert_random_word2vec" or fea_type == "one_hot_word2vec_bert_random" or fea_type == "random_one_hot_word2vec_bert" ): rand_item_fea = get_random_rep(self.n_items, 512) item_fea_dic1 = load_item_fea_dic(config, fea_type="one_hot") item_fea_dic2 = load_item_fea_dic(config, fea_type="word2vec") item_fea_dic3 = load_item_fea_dic(config, fea_type="bert") item_fea_list1 = np.array( [item_fea_dic1[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list2 = np.array( [item_fea_dic2[self.id2item[k]] for k in np.arange(self.n_items)] ) item_fea_list3 = np.array( [item_fea_dic3[self.id2item[k]] for k in np.arange(self.n_items)] ) self.item_feature = np.concatenate( (item_fea_list1, item_fea_list2, item_fea_list3, rand_item_fea), axis=1 ) else: print( "[ERROR]: CANNOT support feature type {}! intialize with random feature".format( fea_type ) ) self.item_feature = get_random_rep(self.n_items, self.random_dim)
[docs] def init_user_fea(self): """Initialize user feature for VBCAR model.""" if self.config and "user_fea_type" in self.config["dataset"]: fea_type = self.config["dataset"]["user_fea_type"] else: fea_type = "random" if fea_type == "random": self.user_feature = get_random_rep(self.n_users, self.random_dim) else: print( "[ERROR]: CANNOT support other feature type, use 'random' user featrue instead!" ) self.user_feature = get_random_rep(self.n_users, self.random_dim)