import numpy as np
np.set_printoptions(legacy='1.21')
import pandas as pd
from scipy import stats
import seaborn as sns
sns.set_theme()
import statsmodels.formula.api as smf

df = pd.read_csv('ratten.csv')

sns.lmplot(df, x='Alter', y='Belastung', hue='Gelände');

formel = 'Belastung ~ Alter + Gelände'
modell = smf.ols(formel, df)
res = modell.fit()

res.summary()

df['Altersdifferenz'] = df.Alter - 8
df.head()

formel2 = 'Belastung ~ Altersdifferenz + Gelände'
modell2 = smf.ols(formel2, df)
res2 = modell2.fit()

res2.summary()

df = pd.read_csv("http://reh.math.uni-duesseldorf.de/~braun/bio2425/zitronen.csv")

df.head()

df.Land.value_counts()

Land
Spanien         8
Italien         8
Griechenland    8
Marokko         8
Indien          8
Name: count, dtype: int64

import statsmodels.stats.anova as smf_anova

formel = "Vitamin_C_Gehalt ~ Land"
modell = smf.ols(formel, df)
res = modell.fit()

smf_anova.anova_lm(res)

tabelle = smf_anova.anova_lm(res)

tabelle['PR(>F)'].Land

3.3733416696759452e-06

from statsmodels.sandbox.stats.multicomp import MultiComparison

muc = MultiComparison(df.Vitamin_C_Gehalt, df.Land)

res = muc.allpairtest(stats.ttest_ind, method='holm')
res[0]

df = pd.read_csv('corona.csv')
df.head()

ax = sns.scatterplot(data=df, x="Tag (im März)", y="Anzahl");

ax.set_yscale('log')
ax.figure

df['logAnzahl'] = np.log(df.Anzahl)
df['Tag'] = df['Tag (im März)']

formel = 'logAnzahl ~ Tag'
modell = smf.ols(formel, df)
res = modell.fit()

res.summary()

tage = np.arange(3, 24)
gerade = 0.226*tage + 3.441

titel = "Die logarithmierten Daten zusammen mit ihrer Regressionsgerade"
ax2 = sns.scatterplot(x=df.Tag, y=df.logAnzahl)
sns.lineplot(x=tage, y=gerade)
ax2.set_title(titel);

titel = "Die exponentierte Regressionskurve zusammen mit den Ausgangsdaten in halblogarithmischer Darstellung"
sns.lineplot(x=tage, y=np.exp(gerade), ax=ax)
ax.set_title(titel)
ax.figure

titel = "Die exponentierte Regressionskurve zusammen mit den Ausgangsdaten in linearer Darstellung"
ax.set_title(titel)
ax.set_yscale('linear')
ax.figure

m = 0.226

t = np.log(2) / m
t

3.067022922831616

Dep. Variable:	Belastung	R-squared:	0.673
Model:	OLS	Adj. R-squared:	0.635
Method:	Least Squares	F-statistic:	17.52
Date:	Sun, 06 Jul 2025	Prob (F-statistic):	7.42e-05
Time:	17:01:42	Log-Likelihood:	-63.935
No. Observations:	20	AIC:	133.9
Df Residuals:	17	BIC:	136.9
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	39.1728	5.166	7.583	0.000	28.273	50.072
Gelände[T.unkontaminiert]	-11.0980	3.124	-3.552	0.002	-17.689	-4.507
Alter	3.5490	0.617	5.752	0.000	2.247	4.851

Omnibus:	1.119	Durbin-Watson:	2.547
Prob(Omnibus):	0.572	Jarque-Bera (JB):	0.408
Skew:	-0.346	Prob(JB):	0.815
Kurtosis:	3.102	Cond. No.	33.2

	Alter	Belastung	Gelände	Altersdifferenz
0	10	63	unkontaminiert	2
1	12	67	unkontaminiert	4
2	6	55	unkontaminiert	-2
3	6	42	unkontaminiert	-2
4	11	73	unkontaminiert	3

Dep. Variable:	Belastung	R-squared:	0.673
Model:	OLS	Adj. R-squared:	0.635
Method:	Least Squares	F-statistic:	17.52
Date:	Sun, 06 Jul 2025	Prob (F-statistic):	7.42e-05
Time:	17:01:42	Log-Likelihood:	-63.935
No. Observations:	20	AIC:	133.9
Df Residuals:	17	BIC:	136.9
Df Model:	2
Covariance Type:	nonrobust

Mathematik für Biologiestudierende II¶

Klausur¶

Termine¶

Themen¶

Lineare Modelle mit kategoriellen Daten¶

Beispiel Ratten¶

Bestimmung des p-Werts¶

ANOVA als Lineares Modell¶

Regression im exponentiellen Modell¶

Beispiel Covid-Erkrankungen¶

Halblogarithmische Darstellung¶

Exponentielles Modell vs. Lineare Regression¶

Regression im exponentiellen Modell¶

Halbwerts- bzw. Verdoppelungszeit¶

	Vitamin_C_Gehalt	Land
0	494.5	Spanien
1	499.2	Spanien
2	494.3	Spanien
3	478.0	Spanien
4	500.1	Spanien

	df	sum_sq	mean_sq	F	PR(>F)
Land	4.0	4378.44650	1094.611625	11.873758	0.000003
Residual	35.0	3226.56125	92.187464	NaN	NaN

group1	group2	stat	pval	pval_corr	reject
Griechenland	Indien	-4.9524	0.0002	0.0019	True
Griechenland	Italien	1.113	0.2845	0.5689	False
Griechenland	Marokko	-3.5339	0.0033	0.0231	True
Griechenland	Spanien	-1.9478	0.0718	0.2153	False
Indien	Italien	6.2008	0.0	0.0002	True
Indien	Marokko	0.3183	0.7549	0.7549	False
Indien	Spanien	3.3226	0.005	0.0302	True
Italien	Marokko	-4.3312	0.0007	0.0055	True
Italien	Spanien	-3.3042	0.0052	0.0302	True
Marokko	Spanien	2.2786	0.0389	0.1556	False

Dep. Variable:	logAnzahl	R-squared:	0.960
Model:	OLS	Adj. R-squared:	0.958
Method:	Least Squares	F-statistic:	458.1
Date:	Sun, 06 Jul 2025	Prob (F-statistic):	9.25e-15
Time:	17:01:43	Log-Likelihood:	-2.9636
No. Observations:	21	AIC:	9.927
Df Residuals:	19	BIC:	12.02
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	0.968	Durbin-Watson:	1.597
Prob(Omnibus):	0.616	Jarque-Bera (JB):	0.816
Skew:	-0.438	Prob(JB):	0.665
Kurtosis:	2.594	Cond. No.	34.1

	Tag (im März)	Anzahl
0	3	38
1	4	52
2	5	109
3	6	185
4	7	150

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	3.4410	0.151	22.728	0.000	3.124	3.758
Tag	0.2260	0.011	21.403	0.000	0.204	0.248