import numpy as np
np.set_printoptions(legacy='1.21')
import pandas as pd
from scipy import stats
import seaborn as sns
sns.set_theme()
import statsmodels.formula.api as smf

df = pd.read_csv('blutdruckdaten.csv')
df.describe()

formel = 'Blutdruck ~ Alter'

modell = smf.ols(formel, df)
res = modell.fit()
res.summary()

anfrage = pd.DataFrame()
anfrage['Alter'] = [50]
#  rechte Seite ist auch ein array, auch wenn nur ein Wert berechnet werden soll
anfrage

res.get_prediction(anfrage).summary_frame()

ax = sns.regplot(df, x='Alter', y='Blutdruck')
ax;

anfrage = pd.DataFrame()
anfrage['Alter'] = np.arange(18, 70)
anfrage.head()

vorhersage = res.get_prediction(anfrage).summary_frame()
vorhersage.head()

sns.lineplot(x=anfrage.Alter, y=vorhersage.obs_ci_lower, ax = ax, color='orange')
sns.lineplot(x=anfrage.Alter, y=vorhersage.obs_ci_upper, ax=ax, color='orange')

<Axes: xlabel='Alter', ylabel='Blutdruck'>

ax.figure

ax = sns.regplot(df, x='Alter', y='Blutdruck', ci=99)
vorhersage = res.get_prediction(anfrage).summary_frame(alpha=0.01)
sns.lineplot(x=anfrage.Alter, y=vorhersage.obs_ci_lower, ax = ax, color='orange')
sns.lineplot(x=anfrage.Alter, y=vorhersage.obs_ci_upper, ax=ax, color='orange');

df = pd.read_csv('fische.csv')
df.head()

formel1 = 'Gewicht ~ Monat'
modell1 = smf.ols(formel1, df)

res = modell1.fit()
res.summary()

anfrage = pd.DataFrame()
anfrage['Monat'] = [18]
res.get_prediction(anfrage).summary_frame()

1200 * 1490.8 / 1000

1788.96

formel2 = 'Höhe ~ Monat'
modell2 = smf.ols(formel2, df)
res = modell2.fit()
res.summary()

res.get_prediction(anfrage).summary_frame()

df = pd.read_csv('galton.csv')
df.describe()

df.gender.value_counts()

gender
male    481
Name: count, dtype: int64

formel = "childHeight ~ father + mother"
model = smf.ols(formel, df)

res = model.fit()

res.summary()

anfrage = pd.DataFrame()
anfrage['father'] = [68.9]
anfrage['mother'] = [66.4]
anfrage

res.get_prediction(anfrage).summary_frame()

formel_mph = "midparentHeight ~ father + mother"
modell_mph = smf.ols(formel_mph, df)
res_mph = modell_mph.fit()

res_mph.summary()

	Alter	Blutdruck	Größe
count	30.000000	30.000000	30.000000
mean	44.800000	147.933333	176.373333
std	15.202994	27.391080	5.349504
min	17.000000	108.000000	165.300000
25%	37.250000	125.250000	173.350000
50%	45.000000	144.500000	176.650000
75%	56.000000	173.250000	179.075000
max	69.000000	199.000000	189.000000

Dep. Variable:	Blutdruck	R-squared:	0.701
Model:	OLS	Adj. R-squared:	0.690
Method:	Least Squares	F-statistic:	65.54
Date:	Tue, 17 Jun 2025	Prob (F-statistic):	8.17e-09
Time:	07:58:58	Log-Likelihood:	-123.27
No. Observations:	30	AIC:	250.5
Df Residuals:	28	BIC:	253.3
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	80.3697	8.798	9.135	0.000	62.348	98.391
Alter	1.5081	0.186	8.096	0.000	1.127	1.890

Omnibus:	2.886	Durbin-Watson:	2.401
Prob(Omnibus):	0.236	Jarque-Bera (JB):	1.526
Skew:	0.390	Prob(JB):	0.466
Kurtosis:	3.782	Cond. No.	149.

	mean	mean_se	mean_ci_lower	mean_ci_upper	obs_ci_lower	obs_ci_upper
0	107.515824	5.716491	95.806124	119.225524	74.152388	140.879261
1	109.023940	5.554540	97.645982	120.401899	75.775486	142.272395
2	110.532056	5.394160	99.482621	121.581491	77.394588	143.669525
3	112.040172	5.235496	101.315746	122.764599	79.009654	145.070690
4	113.548288	5.078708	103.145027	123.951550	80.620646	146.475931

Mathematik für Biologiestudierende II¶

Themen¶

Vorhersagen (prediction)¶

Vorhersagen der erwarteten Beobachtungen¶

Beispiel: Fische¶

Vorhersagen bei mehreren erklärenden Variablen¶

midparentHeight¶

	Monat	Höhe	Gewicht
0	4	18.361636	351.447570
1	18	82.891739	1489.847354
2	16	75.875655	1456.778649
3	5	24.685312	466.510582
4	23	102.858653	1992.998583

Dep. Variable:	Gewicht	R-squared:	0.992
Model:	OLS	Adj. R-squared:	0.992
Method:	Least Squares	F-statistic:	8866.
Date:	Tue, 17 Jun 2025	Prob (F-statistic):	9.01e-74
Time:	07:58:59	Log-Likelihood:	-365.66
No. Observations:	70	AIC:	735.3
Df Residuals:	68	BIC:	739.8
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	3.720	Durbin-Watson:	1.931
Prob(Omnibus):	0.156	Jarque-Bera (JB):	3.556
Skew:	-0.198	Prob(JB):	0.169
Kurtosis:	4.030	Cond. No.	34.8

Dep. Variable:	Höhe	R-squared:	0.995
Model:	OLS	Adj. R-squared:	0.995
Method:	Least Squares	F-statistic:	1.474e+04
Date:	Tue, 17 Jun 2025	Prob (F-statistic):	3.13e-81
Time:	07:58:59	Log-Likelihood:	-143.68
No. Observations:	70	AIC:	291.4
Df Residuals:	68	BIC:	295.9
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	3.746	Durbin-Watson:	1.917
Prob(Omnibus):	0.154	Jarque-Bera (JB):	3.670
Skew:	0.521	Prob(JB):	0.160
Kurtosis:	2.583	Cond. No.	34.8

	father	mother	midparentHeight	children	childNum	childHeight
count	481.000000	481.000000	481.000000	481.000000	481.000000	481.000000
mean	69.137630	64.033264	69.146778	6.008316	2.257796	69.234096
std	2.305767	2.321623	1.775891	2.669257	1.402404	2.623905
min	62.000000	58.000000	64.400000	1.000000	1.000000	60.000000
25%	68.000000	63.000000	68.020000	4.000000	1.000000	67.500000
50%	69.000000	64.000000	69.180000	6.000000	2.000000	69.200000
75%	70.500000	66.000000	70.140000	8.000000	3.000000	71.000000
max	78.500000	70.500000	75.430000	15.000000	10.000000	79.000000

Dep. Variable:	childHeight	R-squared:	0.238
Model:	OLS	Adj. R-squared:	0.235
Method:	Least Squares	F-statistic:	74.62
Date:	Tue, 17 Jun 2025	Prob (F-statistic):	6.25e-29
Time:	07:58:59	Log-Likelihood:	-1080.7
No. Observations:	481	AIC:	2167.
Df Residuals:	478	BIC:	2180.
Df Model:	2
Covariance Type:	nonrobust

Omnibus:	10.653	Durbin-Watson:	1.592
Prob(Omnibus):	0.005	Jarque-Bera (JB):	14.542
Skew:	-0.200	Prob(JB):	0.000695
Kurtosis:	3.752	Cond. No.	3.69e+03

Dep. Variable:	midparentHeight	R-squared:	1.000
Model:	OLS	Adj. R-squared:	1.000
Method:	Least Squares	F-statistic:	6.087e+26
Date:	Tue, 17 Jun 2025	Prob (F-statistic):	0.00
Time:	07:58:59	Log-Likelihood:	12557.
No. Observations:	481	AIC:	-2.511e+04
Df Residuals:	478	BIC:	-2.510e+04
Df Model:	2
Covariance Type:	nonrobust

Omnibus:	14.588	Durbin-Watson:	0.000
Prob(Omnibus):	0.001	Jarque-Bera (JB):	18.675
Skew:	0.299	Prob(JB):	8.80e-05
Kurtosis:	3.757	Cond. No.	3.69e+03

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-2.6497	12.964	-0.204	0.839	-28.519	23.219
Monat	83.7359	0.889	94.161	0.000	81.961	85.510

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-0.2723	0.544	-0.501	0.618	-1.358	0.813
Monat	4.5292	0.037	121.402	0.000	4.455	4.604

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	19.3128	4.095	4.716	0.000	11.266	27.359
father	0.4176	0.046	9.154	0.000	0.328	0.507
mother	0.3288	0.045	7.258	0.000	0.240	0.418

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	4.518e-13	1.99e-12	0.227	0.820	-3.46e-12	4.36e-12
father	0.5000	2.22e-14	2.26e+13	0.000	0.500	0.500
mother	0.5400	2.2e-14	2.45e+13	0.000	0.540	0.540