import numpy as np
import pandas as pd
import statsmodels.api as sm       
from statsmodels.formula.api import logit

from matplotlib import pyplot as plt
plt.rc("figure", figsize=(16,8))
plt.rc("font", size=14)


df = pd.read_csv("https://stats.idre.ucla.edu/stat/data/binary.csv")


df['rank'] = [str(i) for i in df['rank']]


df.head()


model = logit(formula='admit ~ gpa + gre + rank', data=df).fit()
print(model.summary())

Optimization terminated successfully.
         Current function value: 0.573147
         Iterations 6
                           Logit Regression Results                           
==============================================================================
Dep. Variable:                  admit   No. Observations:                  400
Model:                          Logit   Df Residuals:                      394
Method:                           MLE   Df Model:                            5
Date:                Mon, 01 Feb 2021   Pseudo R-squ.:                 0.08292
Time:                        20:55:05   Log-Likelihood:                -229.26
converged:                       True   LL-Null:                       -249.99
Covariance Type:            nonrobust   LLR p-value:                 7.578e-08
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -3.9900      1.140     -3.500      0.000      -6.224      -1.756
rank[T.2]     -0.6754      0.316     -2.134      0.033      -1.296      -0.055
rank[T.3]     -1.3402      0.345     -3.881      0.000      -2.017      -0.663
rank[T.4]     -1.5515      0.418     -3.713      0.000      -2.370      -0.733
gpa            0.8040      0.332      2.423      0.015       0.154       1.454
gre            0.0023      0.001      2.070      0.038       0.000       0.004
==============================================================================


model.pvalues < 0.05

Intercept    True
rank[T.2]    True
rank[T.3]    True
rank[T.4]    True
gpa          True
gre          True
dtype: bool


# Odds Ratio
round(np.exp(model.params), 3)

Intercept    0.019
rank[T.2]    0.509
rank[T.3]    0.262
rank[T.4]    0.212
gpa          2.235
gre          1.002
dtype: float64


# Average Marginal Effects
model.get_margeff().summary()


print(model.summary().tables[0][3][2], model.summary().tables[0][3][3])

  Pseudo R-squ.:      0.08292


(sum([round(i) for i in model.predict()]==df.admit)/len(df))*100

71.0

	admit	gre	gpa	rank
0	0	380	3.61	3
1	1	660	3.67	3
2	1	800	4.00	1
3	1	640	3.19	4
4	0	520	2.93	4

	dy/dx	std err	z	P>\|z\|	[0.025	0.975]
rank[T.2]	-0.1314	0.060	-2.184	0.029	-0.249	-0.013
rank[T.3]	-0.2608	0.062	-4.176	0.000	-0.383	-0.138
rank[T.4]	-0.3019	0.076	-3.956	0.000	-0.451	-0.152
gpa	0.1564	0.063	2.485	0.013	0.033	0.280
gre	0.0004	0.000	2.107	0.035	3.07e-05	0.001

1. Predict admission by the results of GPA and GRE tests and the rank of the university.¶

2. Interpret the coefficients. Which are significant? Which are positive/negative?¶

Significance¶

3. Test how the model fits the data. Find pseudo R2 and PCP (ePCP).¶