Source code for beta_rec.utils.seq_evaluation

import numpy as np


[docs]def precision(ground_truth, prediction):
    """Compute Precision metric.

    Args:
        ground_truth (List): the ground truth set or sequence
        prediction (List): the predicted set or sequence

    Returns:
        precision_score (float): the value of the metric
    """
    ground_truth = remove_duplicates(ground_truth)
    prediction = remove_duplicates(prediction)
    precision_score = count_a_in_b_unique(prediction, ground_truth) / float(
        len(prediction)
    )
    assert 0 <= precision_score <= 1
    return precision_score


[docs]def recall(ground_truth, prediction):
    """Compute Recall metric.

    Args:
        ground_truth (List): the ground truth set or sequence
        prediction (List): the predicted set or sequence

    Returns:
        recall_score (float): the value of the metric
    """
    ground_truth = remove_duplicates(ground_truth)
    prediction = remove_duplicates(prediction)
    recall_score = (
        0
        if len(prediction) == 0
        else count_a_in_b_unique(prediction, ground_truth) / float(len(ground_truth))
    )
    assert 0 <= recall_score <= 1
    return recall_score


[docs]def mrr(ground_truth, prediction):
    """Compute Mean Reciprocal Rank metric. Reciprocal Rank is set 0 if no predicted item is in contained the ground truth.

    Args:
        ground_truth (List): the ground truth set or sequence
        prediction (List): the predicted set or sequence

    Returns:
        rr (float): the value of the metric
    """
    rr = 0.0
    for rank, p in enumerate(prediction):
        if p in ground_truth:
            rr = 1.0 / (rank + 1)
            break
    return rr


[docs]def ndcg(ground_truth, prediction):
    """Compute Normalized Discounted Cumulative Gain (NDCG) metric.

    Args:
        ground_truth (List): the ground truth set or sequence.
        prediction (List): the predicted set or sequence.

    Returns:
        ndcg (float): the value of the metric.
    """
    # ground_truth = remove_duplicates(ground_truth)
    # prediction = remove_duplicates(prediction)
    gt_pos = [1 if i in ground_truth else 0 for i in prediction]
    pd_rank = [rank for rank, i in enumerate(prediction) if i in ground_truth]

    def dcg_score(gt_pos, pd_rank):
        ranked_scores = np.take(gt_pos, pd_rank)
        gain = 2 ** ranked_scores - 1
        discounts = [np.log2(rank + 2) for rank in pd_rank]
        return np.sum(gain / discounts)

    if len(pd_rank) != 0:
        dcg = dcg_score(gt_pos, pd_rank)

        i_gt_pos = [gt_pos[i] for i in np.argsort(gt_pos)[::-1]]
        i_pd_rank = [rank for rank, i in enumerate(i_gt_pos) if i not in [0]]
        idcg = dcg_score(i_gt_pos, i_pd_rank)
        ndcg = dcg / idcg
    else:
        ndcg = 0.0

    return ndcg


[docs]def count_a_in_b_unique(a, b):
    """Count unique items.

    Args:
        a (List): list of lists.
        b (List): list of lists.

    Returns:
        count (int): number of elements of a in b.
    """
    count = 0
    for el in a:
        if el in b:
            count += 1
    return count


[docs]def remove_duplicates(li):
    """Remove duplicated items in the list."""
    return [list(x) for x in set(tuple(x) for x in li)]