Hosted with nbsanity. See source notebook on GitHub.

import pandas as pd
import numpy as np
data = pd.read_csv('./loan_data.csv') 

train_data = data.sample(frac=0.8)
test_data = data.drop(train_data.index)
discrete_features = ['person_home_ownership',
                     'person_gender',
                     'person_education',
                     'loan_intent',
                     'previous_loan_defaults_on_file',
                     'loan_status']
continuous_features = ['person_age',
                       'person_income',
                       'person_emp_exp',
                       'loan_amnt',
                       'loan_int_rate',
                       'loan_percent_income',
                       'cb_person_cred_hist_length',
                       'credit_score']
train_data[discrete_features].head(5)
person_home_ownership person_gender person_education loan_intent previous_loan_defaults_on_file loan_status
17004 OWN male Bachelor EDUCATION Yes 0
23005 RENT male High School EDUCATION No 1
42571 RENT female High School MEDICAL No 1
33151 MORTGAGE male Bachelor DEBTCONSOLIDATION Yes 0
25229 MORTGAGE female Associate DEBTCONSOLIDATION No 0
train_data[continuous_features].head(5)
person_age person_income person_emp_exp loan_amnt loan_int_rate loan_percent_income cb_person_cred_hist_length credit_score
17004 22.0 66454.0 0 8000.0 11.01 0.12 2.0 679
23005 30.0 43541.0 10 8000.0 14.59 0.18 9.0 644
42571 35.0 69434.0 12 6173.0 11.05 0.09 10.0 476
33151 30.0 83584.0 4 20000.0 9.41 0.24 6.0 665
25229 33.0 98797.0 10 9000.0 8.00 0.09 8.0 708
approved_df = train_data[train_data['loan_status'] == 1]
rejected_df = train_data[train_data['loan_status'] == 0]

approved_discrete_features = {feature: approved_df[feature].value_counts(normalize=True) \
                                for feature in discrete_features}
rejected_discrete_features = {feature: rejected_df[feature].value_counts(normalize=True) \
                                for feature in discrete_features}

approved_continuous_features = {feature:(approved_df[feature].mean(), approved_df[feature].std()) \
                                    for feature in continuous_features}
rejected_continuous_features = {feature:(rejected_df[feature].mean(), rejected_df[feature].std()) \
                                    for feature in continuous_features}
normal_distribution = lambda x, mean, std: 1/std/np.sqrt(2*np.pi)*np.exp(-0.5*(x-mean)**2/std**2)
for idx, sample_data in test_data.iterrows():
    likelihood_approved = 0
    likelihood_rejected = 0
    for feature in discrete_features:
        if feature != 'loan_status':
            try:
                likelihood_approved += np.log2(approved_discrete_features[feature][sample_data[feature]])
            except:
                likelihood_approved += -300

            try:
                likelihood_rejected += np.log2(rejected_discrete_features[feature][sample_data[feature]])
            except:
                likelihood_rejected += -300

    for feature in continuous_features:
        (mean, stdev) = approved_continuous_features[feature]
        likelihood_approved += np.log2(normal_distribution(sample_data[feature], mean, stdev))

        (mean, stdev) = rejected_continuous_features[feature]
        likelihood_rejected += np.log2(normal_distribution(sample_data[feature], mean, stdev))

    likelihood_approved += np.log2(len(approved_df) / len(train_data))
    likelihood_rejected += np.log2(len(rejected_df) / len(train_data))

    if likelihood_approved > likelihood_rejected:
        test_data.loc[idx, 'predicted_loan_status'] = 1
    else:
        test_data.loc[idx, 'predicted_loan_status'] = 0
### out of the positive predictions, how many were correct
precision = (test_data.loan_status == test_data.predicted_loan_status).sum()\
                /len(test_data)

### percentage of positives that are correctly predicted
sensitivity = (test_data[(test_data.loan_status == 1) & (test_data.predicted_loan_status == 1)].shape[0]\
                / test_data[test_data.loan_status == 1].shape[0])

### percentage of negatives that are correctly predicted
specificity = (test_data[(test_data.loan_status == 0) & (test_data.predicted_loan_status == 0)].shape[0]\
                / test_data[test_data.loan_status == 0].shape[0])
print(f"Precision: {precision:.4f}")
print(f"Sensitivity: {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
Precision: 0.8829
Sensitivity: 0.7763
Specificity: 0.9145