Here is how you can calculate accuracy, precision, recall and f1-score for your binary classification predictions, a plain vanilla implementation in python:
# i made this method based on this discussion:
# https://stackoverflow.com/questions/14117997/what-does-recall-mean-in-machine-learning
def calculate_recall_precision(label, prediction):
true_positives = 0
false_positives = 0
true_negatives = 0
false_negatives = 0
for i in range(0, len(label)):
if prediction[i] == 1:
if prediction[i] == label[i]:
true_positives += 1
else:
false_positives += 1
else:
if prediction[i] == label[i]:
true_negatives += 1
else:
false_negatives += 1
# a ratio of correctly predicted observation to the total observations
accuracy = (true_positives + true_negatives) \
/ (true_positives + true_negatives + false_positives + false_negatives)
# precision is "how useful the search results are"
precision = true_positives / (true_positives + false_positives)
# recall is "how complete the results are"
recall = true_positives / (true_positives + false_negatives)
f1_score = 2 / ((1 / precision) + (1 / recall))
return accuracy, precision, recall, f1_score
# usage example:
y_true = [1, 1, 0, 1, 1]
y_pred = [0, 1, 0, 0, 1]
accuracy, precision, recall, f1_score = calculate_recall_precision(y_true, y_pred)
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 score: ", f1_score)
# outputs:
# Accuracy: 0.6
# Precision: 1.0
# Recall: 0.5
# F1 score: 0.6666666666666666
And here is the same result using scikit-learn library (which allows flexibility for calculating these metrics):
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 score: ", f1_score)
# outputs:
# Accuracy: 0.6
# Precision: 1.0
# Recall: 0.5
# F1 score: 0.666666666667