import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.formula.api as smf
import seaborn as sns
sns.set_theme()
import warnings
warnings.filterwarnings('ignore', message='The figure layout has changed')

df = pd.read_csv('ratten.csv')

sns.lmplot(df, x='Alter', y='Belastung', hue='Gelände');

formel = 'Belastung ~ Alter + Gelände'
modell = smf.ols(formel, df)
res = modell.fit()

res.summary()

anfrage = pd.DataFrame()
anfrage['Alter'] = [8,8,9,9]
anfrage['Gelände'] = ['kontaminiert', 'unkontaminiert', 'kontaminiert', 'unkontaminiert']
anfrage

res.get_prediction(anfrage).summary_frame()

df['Altersdifferenz'] = df.Alter - 8
df.head()

formel2 = 'Belastung ~ Altersdifferenz + Gelände'
modell2 = smf.ols(formel2, df)
res2 = modell2.fit()

res2.summary()

df = pd.read_csv('corona.csv')
df.head()

ax = sns.scatterplot(data=df, x="Tag (im März)", y="Anzahl");

ax.set_yscale('log')
ax.figure

df['logAnzahl'] = np.log(df.Anzahl)
df['Tag'] = df['Tag (im März)']

formel = 'logAnzahl ~ Tag'
modell = smf.ols(formel, df)
res = modell.fit()

res.summary()

tage = np.arange(3, 24)
gerade = 0.226*tage + 3.441

titel = "Die logarithmierten Daten zusammen mit ihrer Regressionsgerade"
ax2 = sns.scatterplot(x=df.Tag, y=df.logAnzahl)
sns.lineplot(x=tage, y=gerade)
ax2.set_title(titel);

titel = "Die exponentierte Regressionskurve zusammen mit den Ausgangsdaten in halblogarithmischer Darstellung"
sns.lineplot(x=tage, y=np.exp(gerade), ax=ax)
ax.set_title(titel)
ax.figure

titel = "Die exponentierte Regressionskurve zusammen mit den Ausgangsdaten in linearer Darstellung"
ax.set_title(titel)
ax.set_yscale('linear')
ax.figure

m = 0.226

t = np.log(2) / m
t

3.067022922831616

Dep. Variable:	Belastung	R-squared:	0.673
Model:	OLS	Adj. R-squared:	0.635
Method:	Least Squares	F-statistic:	17.52
Date:	Tue, 21 Jan 2025	Prob (F-statistic):	7.42e-05
Time:	10:57:26	Log-Likelihood:	-63.935
No. Observations:	20	AIC:	133.9
Df Residuals:	17	BIC:	136.9
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	39.1728	5.166	7.583	0.000	28.273	50.072
Gelände[T.unkontaminiert]	-11.0980	3.124	-3.552	0.002	-17.689	-4.507
Alter	3.5490	0.617	5.752	0.000	2.247	4.851

Omnibus:	1.119	Durbin-Watson:	2.547
Prob(Omnibus):	0.572	Jarque-Bera (JB):	0.408
Skew:	-0.346	Prob(JB):	0.815
Kurtosis:	3.102	Cond. No.	33.2

	mean	mean_se	mean_ci_lower	mean_ci_upper	obs_ci_lower	obs_ci_upper
0	67.564695	2.037898	63.265107	71.864283	53.358146	81.771244
1	56.466728	2.284487	51.646882	61.286575	42.094168	70.839289
2	71.113678	2.182221	66.509596	75.717761	56.812030	85.415327
3	60.015712	2.074920	55.638014	64.393409	45.785328	74.246095

Dep. Variable:	Belastung	R-squared:	0.673
Model:	OLS	Adj. R-squared:	0.635
Method:	Least Squares	F-statistic:	17.52
Date:	Tue, 21 Jan 2025	Prob (F-statistic):	7.42e-05
Time:	10:57:26	Log-Likelihood:	-63.935
No. Observations:	20	AIC:	133.9
Df Residuals:	17	BIC:	136.9
Df Model:	2
Covariance Type:	nonrobust

Mathematik für Biologiestudierende II¶

Klausur¶

Themen heute¶

Lineare Modelle mit kategoriellen Daten¶

Bestimmung des p-Werts¶

Regression im exponentiellen Modell¶

Beispiel Covid-Erkrankungen¶

Halblogarithmische Darstellung¶

Exponentielles Modell vs. Lineare Regression¶

Regression im exponentiellen Modell¶

Halbwerts- bzw. Verdoppelungszeit¶

	Alter	Gelände
0	8	kontaminiert
1	8	unkontaminiert
2	9	kontaminiert
3	9	unkontaminiert

Dep. Variable:	logAnzahl	R-squared:	0.960
Model:	OLS	Adj. R-squared:	0.958
Method:	Least Squares	F-statistic:	458.1
Date:	Tue, 21 Jan 2025	Prob (F-statistic):	9.25e-15
Time:	10:57:26	Log-Likelihood:	-2.9636
No. Observations:	21	AIC:	9.927
Df Residuals:	19	BIC:	12.02
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	0.968	Durbin-Watson:	1.597
Prob(Omnibus):	0.616	Jarque-Bera (JB):	0.816
Skew:	-0.438	Prob(JB):	0.665
Kurtosis:	2.594	Cond. No.	34.1

	Tag (im März)	Anzahl
0	3	38
1	4	52
2	5	109
3	6	185
4	7	150

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	3.4410	0.151	22.728	0.000	3.124	3.758
Tag	0.2260	0.011	21.403	0.000	0.204	0.248