Source code for beta_rec.data.grocery_data

import os

import pandas as pd

from ..data.auxiliary_data import Auxiliary
from ..data.base_data import BaseData
from ..utils.common_util import ensureDir
from ..utils.triple_sampler import Sampler

pd.options.mode.chained_assignment = None  # default='warn'


[docs]class GroceryData(BaseData, Auxiliary): r"""A Grocery Data object, which consist one more order/basket column than the BaseData. Re-index all the users and items from raw dataset. Args: split_dataset (train,valid,test): the split dataset, a tuple consisting of training (DataFrame), validate/list of validate (DataFrame), testing/list of testing (DataFrame). intersect (bool, optional): remove users and items of test/valid sets that do not exist in the train set. If the model is able to predict for new users and new items, this can be :obj:`False` (default: :obj:`True`). binarize (bool, optional): binarize the rating column of train set 0 or 1, i.e. implicit feedback. (default: :obj:`True`). bin_thld (int, optional): the threshold of binarization (default: :obj:`0`). normalize (bool, optional): normalize the rating column of train set into [0, 1], i.e. explicit feedback. (default: :obj:`False`). """ def __init__( self, split_dataset, config=None, intersect=True, binarize=True, bin_thld=0.0, normalize=False, ): """Initialize GroceryData Class.""" BaseData.__init__( self, split_dataset=split_dataset, intersect=intersect, binarize=binarize, bin_thld=bin_thld, normalize=normalize, ) self.config = config Auxiliary.__init__( self, config=config, n_users=self.n_users, n_items=self.n_items )
[docs] def sample_triple_time(self, dump=True, load_save=False): """Sample triples or load triples samples from files. This method is only applicable for basket based Recommender. Returns: None """ sample_file_name = ( "triple_" + self.config["dataset"]["dataset"] + ( ("_" + str(self.config["dataset"]["percent"] * 100)) if "percent" in self.config else "" ) + ( ("_" + str(self.config["model"]["time_step"])) if "time_step" in self.config else "_10" ) + "_" + str(self.config["model"]["n_sample"]) if "percent" in self.config else "" + ".csv" ) self.process_path = self.config["system"]["process_dir"] ensureDir(self.process_path) sample_file = os.path.join(self.process_path, sample_file_name) my_sampler = Sampler( self.train, sample_file, self.config["model"]["n_sample"], dump=dump, load_save=load_save, ) return my_sampler.sample_by_time(self.config["model"]["time_step"])
[docs] def sample_triple(self, dump=True, load_save=False): """Sample triples or load triples samples from files. This method is only applicable for basket based Recommender. Returns: None """ sample_file_name = ( "triple_" + self.config["dataset"]["dataset"] + ( ("_" + str(self.config["dataset"]["percent"] * 100)) if "percent" in self.config else "" ) + "_" + str(self.config["model"]["n_sample"]) if "percent" in self.config else "" + ".csv" ) self.process_path = self.config["system"]["process_dir"] ensureDir(self.process_path) sample_file = os.path.join(self.process_path, sample_file_name) my_sampler = Sampler( self.train, sample_file, self.config["model"]["n_sample"], dump=dump, load_save=load_save, ) return my_sampler.sample()