import numpy as np
np.set_printoptions(legacy='1.21')
import seaborn as sns
sns.set_theme()
sns.set_context('talk')
import pandas as pd
from scipy import stats
import statsmodels.formula.api as smf  #   <-----  neu

zitronen = pd.read_csv("http://reh.math.uni-duesseldorf.de/~braun/bio2425/zitronen.csv")

zitronen.head()

zitronen.Land.value_counts()

Land
Spanien         8
Italien         8
Griechenland    8
Marokko         8
Indien          8
Name: count, dtype: int64

import statsmodels.stats.anova as smf_anova

formel = "Vitamin_C_Gehalt ~ Land"
modell = smf.ols(formel, zitronen)
res = modell.fit()

tabelle = smf_anova.anova_lm(res)
tabelle

tabelle['PR(>F)'].Land

3.3733416696760257e-06

from statsmodels.sandbox.stats.multicomp import MultiComparison

muc = MultiComparison(zitronen.Vitamin_C_Gehalt, zitronen.Land)

multitest = muc.allpairtest(stats.ttest_ind, method='holm')
multitest[0]

tabelle

eta2 = tabelle.sum_sq['Land'] / (tabelle.sum_sq['Land']+tabelle.sum_sq['Residual'])
eta2

0.5757320234157529

corona = pd.read_csv('corona.csv')
corona.head()

ax = sns.scatterplot(data=corona, x="Tag (im März)", y="Anzahl");

ax.set_yscale('log')
ax.figure

corona['logAnzahl'] = np.log(corona.Anzahl)
corona['Tag'] = corona['Tag (im März)']

formel = 'logAnzahl ~ Tag'
modell = smf.ols(formel, corona)
res = modell.fit()

res.summary()

tage = np.arange(3, 24)
gerade = 0.226*tage + 3.441

titel = "Die logarithmierten Daten zusammen mit ihrer Regressionsgerade"
ax2 = sns.scatterplot(x=corona.Tag, y=corona.logAnzahl)
sns.lineplot(x=tage, y=gerade)
ax2.set_title(titel);

titel = "Die exponentierte Regressionskurve in halblogarithmischer Darstellung"
sns.lineplot(x=tage, y=np.exp(gerade), ax=ax)
ax.set_title(titel)
ax.figure

titel = "Die exponentierte Regressionskurve in linearer Darstellung"
ax.set_title(titel)
ax.set_yscale('linear')
ax.figure

m = 0.226

t = np.log(2) / m
t

3.067022922831616

sns.regplot(corona, x="Anzahl", y="Tag (im März)", logx=True);

titanic = sns.load_dataset('titanic')
titanic.head()

formel = "survived ~ sex + age + embark_town + C(pclass)"

modell = smf.logit(formel, titanic)
res = modell.fit()

Optimization terminated successfully.
         Current function value: 0.451318
         Iterations 6

res.summary()

anfrage = pd.DataFrame()
anfrage['sex'] = ['male', 'male', 'female']
anfrage['embark_town'] = ['Southampton', 'Southampton', 'Southampton']
anfrage['age'] = [35, 45, 45]
anfrage['pclass'] = [1,2,3]
anfrage

res.get_prediction(anfrage).summary_frame()

kl = pd.read_csv('bsp_logit.csv')
kl.head()

formel = "bestanden ~ ksp"
modell = smf.logit(formel, kl)
res = modell.fit()

Optimization terminated successfully.
         Current function value: 0.480427
         Iterations 6

res.summary()

anfrage = pd.DataFrame()
anfrage['ksp'] = np.linspace(0, 6, 50)
sns.lineplot(x=anfrage.ksp, y=res.get_prediction(anfrage).summary_frame()['predicted']);

anfrage = pd.DataFrame()
anfrage['ksp'] = np.linspace(-10, 15, 150)
sns.lineplot(x=anfrage.ksp, y=res.get_prediction(anfrage).summary_frame()['predicted']);

sns.regplot(kl, x='ksp', y='bestanden', logistic=True, y_jitter=0.08, x_jitter=0.2, marker='.');

	df	sum_sq	mean_sq	F	PR(>F)
Land	4.0	4378.44650	1094.611625	11.873758	0.000003
Residual	35.0	3226.56125	92.187464	NaN	NaN

group1	group2	stat	pval	pval_corr	reject
Griechenland	Indien	-4.9524	0.0002	0.0019	True
Griechenland	Italien	1.113	0.2845	0.5689	False
Griechenland	Marokko	-3.5339	0.0033	0.0231	True
Griechenland	Spanien	-1.9478	0.0718	0.2153	False
Indien	Italien	6.2008	0.0	0.0002	True
Indien	Marokko	0.3183	0.7549	0.7549	False
Indien	Spanien	3.3226	0.005	0.0302	True
Italien	Marokko	-4.3312	0.0007	0.0055	True
Italien	Spanien	-3.3042	0.0052	0.0302	True
Marokko	Spanien	2.2786	0.0389	0.1556	False

	df	sum_sq	mean_sq	F	PR(>F)
Land	4.0	4378.44650	1094.611625	11.873758	0.000003
Residual	35.0	3226.56125	92.187464	NaN	NaN

r-Wert	Interpretation
0.01	geringer Effekt
0.06	mittlerer Effekt
0.14	starker Effekt

Dep. Variable:	logAnzahl	R-squared:	0.960
Model:	OLS	Adj. R-squared:	0.958
Method:	Least Squares	F-statistic:	458.1
Date:	Tue, 03 Feb 2026	Prob (F-statistic):	9.25e-15
Time:	10:19:40	Log-Likelihood:	-2.9636
No. Observations:	21	AIC:	9.927
Df Residuals:	19	BIC:	12.02
Df Model:	1
Covariance Type:	nonrobust

Mathematik für Biologiestudierende¶

Wiederholung (interaktiv)¶

Themen¶

ANOVA als Lineares Modell¶

Effektstärke bei der ANOVA¶

Interpretation der Effektstärke für Cohen's $\eta^2$¶

Regression im exponentiellen Modell¶

Beispiel Covid-Erkrankungen¶

Halblogarithmische Darstellung¶

Exponentielles Modell vs. Lineare Regression¶

Regression im exponentiellen Modell¶

Halbwerts- bzw. Verdoppelungszeit¶

Im Beispiel Covid¶

Logistische Regression¶

Beispiel Klausurergebnis und Übungspunkte¶

Morgen Probeklausur¶

	Vitamin_C_Gehalt	Land
0	494.5	Spanien
1	499.2	Spanien
2	494.3	Spanien
3	478.0	Spanien
4	500.1	Spanien

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	3.4410	0.151	22.728	0.000	3.124	3.758
Tag	0.2260	0.011	21.403	0.000	0.204	0.248

Omnibus:	0.968	Durbin-Watson:	1.597
Prob(Omnibus):	0.616	Jarque-Bera (JB):	0.816
Skew:	-0.438	Prob(JB):	0.665
Kurtosis:	2.594	Cond. No.	34.1

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

Dep. Variable:	survived	No. Observations:	712
Model:	Logit	Df Residuals:	705
Method:	MLE	Df Model:	6
Date:	Tue, 03 Feb 2026	Pseudo R-squ.:	0.3312
Time:	10:19:41	Log-Likelihood:	-321.34
converged:	True	LL-Null:	-480.45
Covariance Type:	nonrobust	LLR p-value:	1.013e-65

	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	4.0368	0.431	9.371	0.000	3.193	4.881
sex[T.male]	-2.5158	0.209	-12.020	0.000	-2.926	-2.106
embark_town[T.Queenstown]	-0.8142	0.568	-1.434	0.152	-1.927	0.299
embark_town[T.Southampton]	-0.4937	0.267	-1.850	0.064	-1.017	0.029
C(pclass)[T.2]	-1.1446	0.291	-3.938	0.000	-1.714	-0.575
C(pclass)[T.3]	-2.4096	0.291	-8.275	0.000	-2.980	-1.839
age	-0.0361	0.008	-4.677	0.000	-0.051	-0.021

	predicted	se	ci_lower	ci_upper
0	0.441399	0.054207	0.339307	0.548697
1	0.149196	0.030443	0.098769	0.219110
2	0.379875	0.059188	0.272391	0.500592

Dep. Variable:	bestanden	No. Observations:	184
Model:	Logit	Df Residuals:	182
Method:	MLE	Df Model:	1
Date:	Tue, 03 Feb 2026	Pseudo R-squ.:	0.1364
Time:	10:19:41	Log-Likelihood:	-88.399
converged:	True	LL-Null:	-102.36
Covariance Type:	nonrobust	LLR p-value:	1.267e-07

	Tag (im März)	Anzahl
0	3	38
1	4	52
2	5	109
3	6	185
4	7	150

	bestanden	ksp
0	0	6.0
1	1	6.0
2	0	3.0
3	1	6.0
4	0	6.0

	coef	std err	z	P>\|z\|	[0.025	0.975]
Intercept	-1.1447	0.470	-2.434	0.015	-2.066	-0.223
ksp	0.5044	0.100	5.024	0.000	0.308	0.701