Here is how you can calculate accuracy, precision, recall and f1-score for your binary classification predictions, a plain vanilla implementation in python:
# i made this method based on this discussion: # https://stackoverflow.com/questions/14117997/what-does-recall-mean-in-machine-learning def calculate_recall_precision(label, prediction): true_positives = 0 false_positives = 0 true_negatives = 0 false_negatives = 0 for i in range(0, len(label)): if prediction[i] == 1: if prediction[i] == label[i]: true_positives += 1 else: false_positives += 1 else: if prediction[i] == label[i]: true_negatives += 1 else: false_negatives += 1 # a ratio of correctly predicted observation to the total observations accuracy = (true_positives + true_negatives) \ / (true_positives + true_negatives + false_positives + false_negatives) # precision is "how useful the search results are" precision = true_positives / (true_positives + false_positives) # recall is "how complete the results are" recall = true_positives / (true_positives + false_negatives) f1_score = 2 / ((1 / precision) + (1 / recall)) return accuracy, precision, recall, f1_score # usage example: y_true = [1, 1, 0, 1, 1] y_pred = [0, 1, 0, 0, 1] accuracy, precision, recall, f1_score = calculate_recall_precision(y_true, y_pred) print("Accuracy: ", accuracy) print("Precision: ", precision) print("Recall: ", recall) print("F1 score: ", f1_score) # outputs: # Accuracy: 0.6 # Precision: 1.0 # Recall: 0.5 # F1 score: 0.6666666666666666
And here is the same result using scikit-learn library (which allows flexibility for calculating these metrics):
from sklearn.metrics import precision_recall_fscore_support, accuracy_score accuracy = accuracy_score(y_true, y_pred) precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary') print("Accuracy: ", accuracy) print("Precision: ", precision) print("Recall: ", recall) print("F1 score: ", f1_score) # outputs: # Accuracy: 0.6 # Precision: 1.0 # Recall: 0.5 # F1 score: 0.666666666667