Source code for beta_rec.data.grocery_data

import os

import pandas as pd

from ..data.auxiliary_data import Auxiliary
from ..data.base_data import BaseData
from ..utils.common_util import ensureDir
from ..utils.triple_sampler import Sampler

pd.options.mode.chained_assignment = None  # default='warn'


[docs]class GroceryData(BaseData, Auxiliary):
    r"""A Grocery Data object, which consist one more order/basket column than the BaseData.

    Re-index all the users and items from raw dataset.

    Args:
        split_dataset (train,valid,test): the split dataset, a tuple consisting of training (DataFrame),
            validate/list of validate (DataFrame), testing/list of testing (DataFrame).
        intersect (bool, optional): remove users and items of test/valid sets that do not exist in the train set.
            If the model is able to predict for new users and new items, this can be :obj:`False`
            (default: :obj:`True`).
        binarize (bool, optional): binarize the rating column of train set 0 or 1, i.e. implicit feedback.
            (default: :obj:`True`).
        bin_thld (int, optional):  the threshold of binarization (default: :obj:`0`).
        normalize (bool, optional): normalize the rating column of train set into [0, 1], i.e. explicit feedback.
            (default: :obj:`False`).
    """

    def __init__(
        self,
        split_dataset,
        config=None,
        intersect=True,
        binarize=True,
        bin_thld=0.0,
        normalize=False,
    ):
        """Initialize GroceryData Class."""
        BaseData.__init__(
            self,
            split_dataset=split_dataset,
            intersect=intersect,
            binarize=binarize,
            bin_thld=bin_thld,
            normalize=normalize,
        )
        self.config = config
        Auxiliary.__init__(
            self, config=config, n_users=self.n_users, n_items=self.n_items
        )

[docs]    def sample_triple_time(self, dump=True, load_save=False):
        """Sample triples or load triples samples from files.

        This method is only applicable for basket based Recommender.

        Returns:
            None

        """
        sample_file_name = (
            "triple_"
            + self.config["dataset"]["dataset"]
            + (
                ("_" + str(self.config["dataset"]["percent"] * 100))
                if "percent" in self.config
                else ""
            )
            + (
                ("_" + str(self.config["model"]["time_step"]))
                if "time_step" in self.config
                else "_10"
            )
            + "_"
            + str(self.config["model"]["n_sample"])
            if "percent" in self.config
            else "" + ".csv"
        )
        self.process_path = self.config["system"]["process_dir"]
        ensureDir(self.process_path)
        sample_file = os.path.join(self.process_path, sample_file_name)
        my_sampler = Sampler(
            self.train,
            sample_file,
            self.config["model"]["n_sample"],
            dump=dump,
            load_save=load_save,
        )
        return my_sampler.sample_by_time(self.config["model"]["time_step"])

[docs]    def sample_triple(self, dump=True, load_save=False):
        """Sample triples or load triples samples from files.

        This method is only applicable for basket based Recommender.

        Returns:
            None

        """
        sample_file_name = (
            "triple_"
            + self.config["dataset"]["dataset"]
            + (
                ("_" + str(self.config["dataset"]["percent"] * 100))
                if "percent" in self.config
                else ""
            )
            + "_"
            + str(self.config["model"]["n_sample"])
            if "percent" in self.config
            else "" + ".csv"
        )
        self.process_path = self.config["system"]["process_dir"]
        ensureDir(self.process_path)
        sample_file = os.path.join(self.process_path, sample_file_name)
        my_sampler = Sampler(
            self.train,
            sample_file,
            self.config["model"]["n_sample"],
            dump=dump,
            load_save=load_save,
        )
        return my_sampler.sample()