import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
sns.set_theme()

df = pd.read_csv("blutdruckdaten.csv")
df.head()

sns.regplot(df, x='Alter', y='Blutdruck');

import statsmodels.formula.api as smf

formel = 'Blutdruck ~ Alter'

modell = smf.ols(formel, df)

res = modell.fit()

res.summary()

cov = 348.57
var_x = 231.13
m = cov / var_x
np.round(m, 4)

1.5081

xq = 44.800  # Mittelwert für x
yq = 147.93  # Mittelwert für y
b = yq - m*xq
np.round(b, 4)

80.3666

res.summary()

P = stats.norm()
df['Zufallsdaten'] = P.rvs(size=30)

formel2 = 'Blutdruck ~ Zufallsdaten'
modell2 = smf.ols(formel2, df)

res2 = modell2.fit()
res2.summary()

sns.regplot(df, x='Zufallsdaten', y='Blutdruck');

res.summary()

res.conf_int(alpha=.01)

r = 0.83705

r**2

0.7006527024999999

res.summary()

df = pd.read_csv('galton.csv')
df.head()

formel = 'childHeight ~ mother'

modell = smf.ols(formel, df)

res = modell.fit()
res.summary()

sns.regplot(df, x='father', y='childHeight')

<Axes: xlabel='father', ylabel='childHeight'>

Mathematik für Biologiestudierende II¶

Lineare Modelle¶

	family	father	mother	midparentHeight	children	childNum	gender	childHeight
0	001	78.5	67.0	75.43	4	1	male	73.2
1	002	75.5	66.5	73.66	4	1	male	73.5
2	002	75.5	66.5	73.66	4	2	male	72.5
3	003	75.0	64.0	72.06	2	1	male	71.0
4	004	75.0	64.0	72.06	5	1	male	70.5

	0	1
Intercept	56.058844	104.680629
Alter	0.993358	2.022874