import numpy as np
np.set_printoptions(legacy='1.21')
import seaborn as sns
sns.set_theme()
sns.set_context('talk')
import pandas as pd
from scipy import stats
import statsmodels.formula.api as smf  #   <-----  neu

galton = pd.read_csv('galton.csv') 
galton.describe()

formel = 'childHeight ~ father + mother'

modell = smf.ols(formel, galton)
res = modell.fit()
res.summary()

anfrage = pd.DataFrame()
anfrage['father'] = [70]
anfrage['mother'] = [69.8]
#  rechte Seite ist immer ein array, auch wenn nur ein Wert berechnet werden soll
anfrage

res.get_prediction(anfrage).summary_frame()

fische = pd.read_csv('fische.csv')
fische.describe()

formel1 = 'Gewicht ~ Monat'
modell1 = smf.ols(formel1, fische)

res = modell1.fit()
res.summary()

anfrage_fische = pd.DataFrame()
anfrage_fische['Monat'] = [18]
res.get_prediction(anfrage_fische).summary_frame()

1200 * 1490.8 / 1000

1788.96

formel2 = 'Höhe ~ Monat'
modell2 = smf.ols(formel2, fische)
res_hoehe = modell2.fit()
res_hoehe.summary()

res_hoehe.get_prediction(anfrage_fische).summary_frame()

res_hoehe.get_prediction(anfrage_fische).summary_frame(alpha=0.01)

kinder = pd.read_csv('kinder.csv')
kinder.head()

sns.lmplot(kinder, x='father', y='childHeight', hue='gender');

formel = 'childHeight ~ father + mother + gender'
modell = smf.ols(formel, kinder)

res = modell.fit()

res.summary()

anfrage['gender'] = ['male']
anfrage

res.get_prediction(anfrage).summary_frame()

ratten = pd.read_csv('ratten.csv')

sns.lmplot(ratten, x='Alter', y='Belastung', hue='Gelände');

ratten['Differenz_zu_9'] = ratten.Alter - 9
ratten.head()

formel = "Belastung ~ Differenz_zu_9 + Gelände"

modell = smf.ols(formel, ratten)
res = modell.fit()

res.summary()

	father	mother	midparentHeight	children	childNum	childHeight
count	481.000000	481.000000	481.000000	481.000000	481.000000	481.000000
mean	69.137630	64.033264	69.146778	6.008316	2.257796	69.234096
std	2.305767	2.321623	1.775891	2.669257	1.402404	2.623905
min	62.000000	58.000000	64.400000	1.000000	1.000000	60.000000
25%	68.000000	63.000000	68.020000	4.000000	1.000000	67.500000
50%	69.000000	64.000000	69.180000	6.000000	2.000000	69.200000
75%	70.500000	66.000000	70.140000	8.000000	3.000000	71.000000
max	78.500000	70.500000	75.430000	15.000000	10.000000	79.000000

Dep. Variable:	childHeight	R-squared:	0.238
Model:	OLS	Adj. R-squared:	0.235
Method:	Least Squares	F-statistic:	74.62
Date:	Tue, 03 Feb 2026	Prob (F-statistic):	6.25e-29
Time:	10:15:42	Log-Likelihood:	-1080.7
No. Observations:	481	AIC:	2167.
Df Residuals:	478	BIC:	2180.
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	19.3128	4.095	4.716	0.000	11.266	27.359
father	0.4176	0.046	9.154	0.000	0.328	0.507
mother	0.3288	0.045	7.258	0.000	0.240	0.418

Omnibus:	10.653	Durbin-Watson:	1.592
Prob(Omnibus):	0.005	Jarque-Bera (JB):	14.542
Skew:	-0.200	Prob(JB):	0.000695
Kurtosis:	3.752	Cond. No.	3.69e+03

	Monat	Höhe	Gewicht
count	70.000000	70.000000	70.000000
mean	13.228571	59.642117	1105.057055
std	6.169517	28.007176	518.587535
min	4.000000	15.810711	311.151315
25%	8.000000	35.186821	644.176651
50%	13.000000	56.927136	1087.083527
75%	19.000000	87.382391	1609.153006
max	23.000000	108.537802	1992.998583

Mathematik für Biologiestudierende¶

Wiederholung (interaktiv)¶

Themen¶

Vorhersagen (prediction)¶

Konfidenzintervalle für den Mittelwert bzw. für die erwarteten Beobachtungen¶

Beispiel: Fische¶

Lineare Modelle mit kategoriellen Daten¶

Prediction im kategoriellen Fall¶

Beispiel Ratten¶

Zweite Probeklausur¶

Omnibus:	3.720	Durbin-Watson:	1.931
Prob(Omnibus):	0.156	Jarque-Bera (JB):	3.556
Skew:	-0.198	Prob(JB):	0.169
Kurtosis:	4.030	Cond. No.	34.8

Omnibus:	3.746	Durbin-Watson:	1.917
Prob(Omnibus):	0.154	Jarque-Bera (JB):	3.670
Skew:	0.521	Prob(JB):	0.160
Kurtosis:	2.583	Cond. No.	34.8

	family	father	mother	midparentHeight	children	childNum	gender	childHeight
0	001	78.5	67.0	75.43	4	1	male	73.2
1	001	78.5	67.0	75.43	4	2	female	69.2
2	001	78.5	67.0	75.43	4	3	female	69.0
3	001	78.5	67.0	75.43	4	4	female	69.0
4	002	75.5	66.5	73.66	4	1	male	73.5

Omnibus:	11.156	Durbin-Watson:	1.549
Prob(Omnibus):	0.004	Jarque-Bera (JB):	15.397
Skew:	-0.114	Prob(JB):	0.000453
Kurtosis:	3.586	Cond. No.	3.63e+03

	Alter	Belastung	Gelände	Differenz_zu_9
0	10	63	unkontaminiert	1
1	12	67	unkontaminiert	3
2	6	55	unkontaminiert	-3
3	6	42	unkontaminiert	-3
4	11	73	unkontaminiert	2

Dep. Variable:	Belastung	R-squared:	0.673
Model:	OLS	Adj. R-squared:	0.635
Method:	Least Squares	F-statistic:	17.52
Date:	Tue, 03 Feb 2026	Prob (F-statistic):	7.42e-05
Time:	10:15:43	Log-Likelihood:	-63.935
No. Observations:	20	AIC:	133.9
Df Residuals:	17	BIC:	136.9
Df Model:	2
Covariance Type:	nonrobust

Omnibus:	1.119	Durbin-Watson:	2.547
Prob(Omnibus):	0.572	Jarque-Bera (JB):	0.408
Skew:	-0.346	Prob(JB):	0.815
Kurtosis:	3.102	Cond. No.	6.43

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-2.6497	12.964	-0.204	0.839	-28.519	23.219
Monat	83.7359	0.889	94.161	0.000	81.961	85.510

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-0.2723	0.544	-0.501	0.618	-1.358	0.813
Monat	4.5292	0.037	121.402	0.000	4.455	4.604

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	16.5212	2.727	6.058	0.000	11.169	21.873
gender[T.male]	5.2150	0.142	36.775	0.000	4.937	5.493
father	0.3928	0.029	13.699	0.000	0.337	0.449
mother	0.3176	0.031	10.245	0.000	0.257	0.378

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	71.1137	2.182	32.588	0.000	66.510	75.718
Gelände[T.unkontaminiert]	-11.0980	3.124	-3.552	0.002	-17.689	-4.507
Differenz_zu_9	3.5490	0.617	5.752	0.000	2.247	4.851