import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.graphics.mosaicplot import mosaic
import seaborn as sns
sns.set_theme()

df = sns.load_dataset("titanic")
df.head()

tafel = pd.crosstab(df.pclass, df.alive)
tafel

mosaic(tafel.stack());

res = stats.chi2_contingency(tafel)
res.pvalue

4.549251711298793e-23

# pd.crosstab(df.class, df.embark_town)   #  invalid syntax

tafel = pd.crosstab(df['class'], df.embark_town)
tafel

mosaic(tafel.stack());

tafel = pd.DataFrame(index=['Tag', 'Dämmerung'])
tafel['Mauerbienen'] = [131, 7]
tafel['Holzbienen'] = [18, 4]
tafel

mosaic(tafel.stack());

stats.fisher_exact(tafel)

SignificanceResult(statistic=4.158730158730159, pvalue=0.046533029009005966)

tafel = pd.DataFrame(index=['Mo', 'Di', 'Mi', 'Do', 'Fr', 'Sa', 'So'])
tafel['Tage'] = [52, 52, 52, 52, 53, 52, 52]
tafel['Geburten'] = [41, 63, 63, 47, 56, 47, 33]
tafel

n = tafel.Geburten.sum()
n

350

tafel['erwartet'] = tafel.Tage*n/365
tafel.round(2)

tafel['Abweichung'] = (tafel.Geburten-tafel.erwartet)**2 / tafel.erwartet
tafel.round(2)

t = tafel.Abweichung.sum()
t

15.05675927845739

P = stats.chi2(6)
1 - P.cdf(t)

0.01981981977372671

res = stats.chisquare(tafel.Geburten, tafel.erwartet)
res

Power_divergenceResult(statistic=15.05675927845739, pvalue=0.01981981977372675)

	survived	pclass	sex	age	sibsp	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True

embark_town	Cherbourg	Queenstown	Southampton
class
First	85	2	127
Second	17	3	164
Third	66	72	353

Zahlencode	Ausprägung	Wahrscheinlichkeit
1	weiß	25%
2	rosa	50%
3	rot	25%

$k_1$	$k_2$	$k_3$	$P(X_1=k_1, X_2=k_2, X_3=k_3) $	kumulierte Summe
$ 0 $	$ 0 $	$ 4 $	$ 0.0039 $	$ 0.0039 $
$ 4 $	$ 0 $	$ 0 $	$ 0.0039 $	$ 0.0078 $
$ 1 $	$ 0 $	$ 3 $	$ 0.0156 $	$ 0.0234 $
$ 3 $	$ 0 $	$ 1 $	$ 0.0156 $	$ 0.0391 $
$ 2 $	$ 0 $	$ 2 $	$ 0.0234 $	$ 0.0625 $
$ 0 $	$ 1 $	$ 3 $	$ 0.0312 $	$ 0.0938 $
$ 3 $	$ 1 $	$ 0 $	$ 0.0312 $	$ 0.1250 $
$ 0 $	$ 4 $	$ 0 $	$ 0.0625 $	$ 0.1875 $
$ 0 $	$ 2 $	$ 2 $	$ 0.0938 $	$ 0.2812 $
$ 1 $	$ 1 $	$ 2 $	$ 0.0938 $	$ 0.3750 $
$ 2 $	$ 1 $	$ 1 $	$ 0.0938 $	$ 0.4688 $
$ 2 $	$ 2 $	$ 0 $	$ 0.0938 $	$ 0.5625 $
$ 0 $	$ 3 $	$ 1 $	$ 0.1250 $	$ 0.6875 $
$ 1 $	$ 3 $	$ 0 $	$ 0.1250 $	$ 0.8125 $
$ 1 $	$ 2 $	$ 1 $	$ 0.1875 $	$ 1.0000 $

Mathematik für Biologiestudierende II¶

$\chi^2$-Unabhängigkeitstest¶

Anwendbarkeit¶

Erstellung der Kontingenztafel aus einer Tabelle¶

Python: reservierte Worte¶

Der $\chi^2$-Anpassungstest¶

Beispiel Geburten pro Wochentag¶

Anpassungstest: Test auf Übereinstimmung der Daten mit einer Verteilung¶

Weiter im Beispiel¶

Ein exakter Permutationstest¶

Mendelsche Erbregeln als Beispiel¶

Interpretation als Vergleich zweier Verteilungen¶

Beispiel Mendel: Tabelle der Wahrscheinlichkeiten der Einzelereignisse¶

Wochentag	Tage im Jahr	Anzahl Geburten
Montag	52	41
Dienstag	52	63
Mittwoch	52	63
Donnerstag	52	47
Freitag	53	56
Samstag	52	47
Sonntag	52	33

	Tage	Geburten	erwartet
Mo	52	41	49.86
Di	52	63	49.86
Mi	52	63	49.86
Do	52	47	49.86
Fr	53	56	50.82
Sa	52	47	49.86
So	52	33	49.86

	Tage	Geburten	erwartet	Abweichung
Mo	52	41	49.86	1.58
Di	52	63	49.86	3.46
Mi	52	63	49.86	3.46
Do	52	47	49.86	0.16
Fr	53	56	50.82	0.53
Sa	52	47	49.86	0.16
So	52	33	49.86	5.70

alive	no	yes
pclass
1	80	136
2	97	87
3	372	119

	Mauerbienen	Holzbienen
Tag	131	18
Dämmerung	7	4