import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
set()
sns.%matplotlib inline
Understanding diagnostic tests
25 August 2021
This notebook explains the meaning of different diagnostic test statistics, how they are derived, and why they are used in different contexts.
Sensitivity and specificity can change due to spectrum bias (?)
Reference:
https://www.ncbi.nlm.nih.gov/books/NBK557491/
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2683447/
https://www.bmj.com/content/353/bmj.i3139
Useful functions
def positive_predictive_value(y, y_pred):
"""
Calculate the Positive Predictive Value (PPV)
from some predicted labels and true labels
This is the probability that a patient has a disease given that
they test positive for the disease.
"""
= confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp = tp / (tp + fp)
ppv return ppv
def negative_predictive_value(y, y_pred):
"""
Calculate the Negative Predictive Value (PPV)
from some predicted labels and true labels
This is the probability that a patient does not have the disease
given that they test negative for the disease.
"""
= confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp = tn / (tn + fn)
npv return npv
def sensitivity(y, y_pred):
"""
Calculate the sensitivity of a test from the predicted
and true labels
This is the probability that a patient with the disease
is correctly identified by the test
"""
= confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp = tp / (tp + fn)
sensitivity return sensitivity
def specificity(y, y_pred):
"""
Calculate the specificity of a test from the predicted
and true labels
This is the probability that a patient without the disease
is correctly identified by the test
"""
= confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp = tn / (tn + fp)
specificity return specificity
def calculate_scores(X):
# standardize the data so that x between 0 and 1
= X.min(), X.max()
x_min, x_max = x_max - x_min
x_range = (X - x_min) / x_range
X return X
def likelihood_ratios(y, y_pred):
= sensitivity(y, y_pred)
sens = specificity(y, y_pred)
spec
= sens / (1 - spec)
plr = (1 - sens) / spec
nlr
return plr, nlr
def generate_sample_data(population_size=2048, prevalence=0.1):
= int(prevalence * population_size)
num_positive = population_size - num_positive
num_negative
= 0.5*np.random.randn(num_negative)
x1 = 0.5*np.random.randn(num_positive) + 1
x2 = np.zeros(x1.shape)
y1 = np.zeros(x2.shape) + 1
y2
= np.concatenate([x1, x2])
X = np.concatenate([y1, y2])
y
return X, y
Generate some sample data
= generate_sample_data(population_size=8192, prevalence=0.1) X, y
= X[np.where(y == 0)]
X0 = X[np.where(y == 1)] X1
=(12, 8))
plt.figure(figsize=64, alpha=0.5)
plt.hist(X0, bins=64, alpha=0.5)
plt.hist(X1, bins plt.show()
= generate_sample_data(population_size=8192, prevalence=0.5)
X, y
= X[np.where(y == 0)]
X0 = X[np.where(y == 1)]
X1
=(12, 8))
plt.figure(figsize=64, alpha=0.5)
plt.hist(X0, bins=64, alpha=0.5)
plt.hist(X1, bins plt.show()
Define the test
Say that patient is positive if value > 1 and negative if < 1
= 0.75
threshold
= (X > threshold).astype(int)
y_pred = calculate_scores(X) y_score
Calculate stats from test
For a given threshold: - PPV, NPV - FP, FN, TP, TN - PLR, NLR, DOR - Sensitivity, Specificity
For arbitrary thresholds: - ROC, AUC
= confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp tn, fp, fn, tp
(3828, 268, 1228, 2868)
positive_predictive_value(y, y_pred), negative_predictive_value(y, y_pred)
(0.9145408163265306, 0.757120253164557)
sensitivity(y, y_pred), specificity(y, y_pred)
(0.7001953125, 0.9345703125)
= roc_curve(y, y_score)
fpr, tpr, thresholds = roc_auc_score(y, y_score) auc
=(12, 8))
plt.figure(figsize
plt.plot(fpr, tpr)'False positive rate')
plt.xlabel('True positive rate')
plt.ylabel(f'ROC curve (AUC = {np.round(auc, 2)})')
plt.title( plt.show()
Why are some of these statistics sensitive to the prevalence of the disease?
PPV, NPV, Sensitivity and Specificity are sensitivity to the prevalence of the disease, i.e., the proportion of the population that actually has the diease. PLR, NLR and DOR on the other hand, are not.
This example shows why this is the case.
# for a given threshold, how do the stats vary with different prevalence fractions?
= np.arange(0.05, 1, 1/20) prevalences
= []
stats
for prevalence in prevalences:
= generate_sample_data(population_size=2048, prevalence=prevalence)
X, y = 0.75
threshold = (X > threshold).astype(int)
y_pred = calculate_scores(X)
y_score = positive_predictive_value(y, y_pred), negative_predictive_value(y, y_pred)
ppv, npv = sensitivity(y, y_pred), specificity(y, y_pred)
sens, spec = confusion_matrix(y, y_pred).ravel()
tn, fp, fn, tp = likelihood_ratios(y, y_pred)
plr, nlr stats.append((tn, fp, fn, tp, ppv, npv, sens, spec, plr, nlr))
Positive and negative predictive values
These are affected by the prevalence of the disease; PPV increases with prevalence while NPV decreases.
Why is this?
- PPV = P(D | H_1) * N_1 / (P(D | H_1) * N_1 + P(D | H_0) * N_0)
- The probabilities do not change but there is a dependence on the absolute numbers of positive and negative cases
More intuitively (an example): - Given 1000 people and a prevalence of 5%, e.g., 50 out of 1000 are positive, 9950 are negative. - Suppose a false positive rate of 5% for test, e.g., 5% of 9950 are positive, e.g., around 498 test positive who are not. - What is the PPV? I.e., probability that if someone tests positive they are positive? - 50 / (498 + 50) probability of being positive.
# plot the stats vs prevalences
= list(zip(*stats))[0]
tns = list(zip(*stats))[1]
fps = list(zip(*stats))[2]
fns = list(zip(*stats))[3]
tps
=(12, 8))
plt.figure(figsize='False positive')
plt.plot(prevalences, fps, label='False negative')
plt.plot(prevalences, fns, label='True positive')
plt.plot(prevalences, tps, label='True negative')
plt.plot(prevalences, tns, label='right')
plt.legend(loc plt.show()
# plot the stats vs prevalences
= list(zip(*stats))[4]
ppvs = list(zip(*stats))[5] npvs
=(12, 8))
plt.figure(figsize=prevalences, height=ppvs, width=0.01, alpha=0.4, label='ppv')
plt.bar(x=prevalences, height=npvs, width=0.01, alpha=0.4, label='npv')
plt.bar(x='right')
plt.legend(loc plt.show()
Sensitivity and specificity
These are independent of the disease prevalence. Why?
These are proportions of each of the distributions (distribution of the positive cases and distribution of the negative cases)
Sensitivity: what proportion of positive cases are above a certain value (defined by the threshold)
Specificity: what proporiton of negative cases are below a certain value (defined by the threshold)
# plot the stats vs prevalences
= list(zip(*stats))[6]
sens = list(zip(*stats))[7]
spec
=(12, 8))
plt.figure(figsize='Sensitivity')
plt.plot(prevalences, sens, label='Specificity')
plt.plot(prevalences, spec, label='right')
plt.legend(loc plt.show()
Positive and negative likelihood ratios
These are not affected by the prevalence of the disease in the population as can be seen in the graph below.
What is the intuition for this? - Apply Bayes’ theorem to the sensitivity -> PPV - The sensitivity is independent of the prevalence, but P(D) (the probability of someone having a the disease, i.e., the prevalence) mutiplies this.
Positive Likelihood Ratio: True positive rate / false positive rate
= list(zip(*stats))[8]
plrs = list(zip(*stats))[9]
nlrs
=(12, 8))
plt.figure(figsize='Positive likelihood ratio')
plt.plot(prevalences, plrs, label='Negative likelihood ratio')
plt.plot(prevalences, nlrs, label='right')
plt.legend(loc plt.show()
Where disease is not dichotomous
The above is when the disease status can be defined in a binary way (either a person has the disease or does not) but some diseases are defined (?) in terms of the value of a measurement. E.g., hypertension being defined as systolic blood pressure being above 150mmHg.
For these cases, the sensitivity, specificity, likelihood ratios and many other measures vary with the prevalence of the disease in the population.
Simulated data
Following https://www.bmj.com/content/353/bmj.i3139, this can be shown by representing the prevalence of the disease within a population by the proportion of the population that is above a certain threshold.
How are false positives, negatives defined in this context?