Source code for beta_rec.utils.seq_evaluation

import numpy as np


[docs]def precision(ground_truth, prediction): """Compute Precision metric. Args: ground_truth (List): the ground truth set or sequence prediction (List): the predicted set or sequence Returns: precision_score (float): the value of the metric """ ground_truth = remove_duplicates(ground_truth) prediction = remove_duplicates(prediction) precision_score = count_a_in_b_unique(prediction, ground_truth) / float( len(prediction) ) assert 0 <= precision_score <= 1 return precision_score
[docs]def recall(ground_truth, prediction): """Compute Recall metric. Args: ground_truth (List): the ground truth set or sequence prediction (List): the predicted set or sequence Returns: recall_score (float): the value of the metric """ ground_truth = remove_duplicates(ground_truth) prediction = remove_duplicates(prediction) recall_score = ( 0 if len(prediction) == 0 else count_a_in_b_unique(prediction, ground_truth) / float(len(ground_truth)) ) assert 0 <= recall_score <= 1 return recall_score
[docs]def mrr(ground_truth, prediction): """Compute Mean Reciprocal Rank metric. Reciprocal Rank is set 0 if no predicted item is in contained the ground truth. Args: ground_truth (List): the ground truth set or sequence prediction (List): the predicted set or sequence Returns: rr (float): the value of the metric """ rr = 0.0 for rank, p in enumerate(prediction): if p in ground_truth: rr = 1.0 / (rank + 1) break return rr
[docs]def ndcg(ground_truth, prediction): """Compute Normalized Discounted Cumulative Gain (NDCG) metric. Args: ground_truth (List): the ground truth set or sequence. prediction (List): the predicted set or sequence. Returns: ndcg (float): the value of the metric. """ # ground_truth = remove_duplicates(ground_truth) # prediction = remove_duplicates(prediction) gt_pos = [1 if i in ground_truth else 0 for i in prediction] pd_rank = [rank for rank, i in enumerate(prediction) if i in ground_truth] def dcg_score(gt_pos, pd_rank): ranked_scores = np.take(gt_pos, pd_rank) gain = 2 ** ranked_scores - 1 discounts = [np.log2(rank + 2) for rank in pd_rank] return np.sum(gain / discounts) if len(pd_rank) != 0: dcg = dcg_score(gt_pos, pd_rank) i_gt_pos = [gt_pos[i] for i in np.argsort(gt_pos)[::-1]] i_pd_rank = [rank for rank, i in enumerate(i_gt_pos) if i not in [0]] idcg = dcg_score(i_gt_pos, i_pd_rank) ndcg = dcg / idcg else: ndcg = 0.0 return ndcg
[docs]def count_a_in_b_unique(a, b): """Count unique items. Args: a (List): list of lists. b (List): list of lists. Returns: count (int): number of elements of a in b. """ count = 0 for el in a: if el in b: count += 1 return count
[docs]def remove_duplicates(li): """Remove duplicated items in the list.""" return [list(x) for x in set(tuple(x) for x in li)]