import numpy as np
np.set_printoptions(legacy='1.21')
import seaborn as sns
sns.set_theme()
sns.set_context('talk')
import pandas as pd
from scipy import stats

zitronen = pd.read_csv("http://reh.math.uni-duesseldorf.de/~braun/bio2425/zitronen.csv")

zitronen.head()

zitronen.Land.value_counts()

Land
Spanien         8
Italien         8
Griechenland    8
Marokko         8
Indien          8
Name: count, dtype: int64

sns.displot(zitronen, x='Vitamin_C_Gehalt', hue='Land', multiple='stack');

spanien = zitronen[zitronen.Land=='Spanien'].Vitamin_C_Gehalt
italien = zitronen[zitronen.Land=='Italien'].Vitamin_C_Gehalt
griechenland = zitronen[zitronen.Land=='Griechenland'].Vitamin_C_Gehalt
marokko = zitronen[zitronen.Land=='Marokko'].Vitamin_C_Gehalt
indien = zitronen[zitronen.Land=='Indien'].Vitamin_C_Gehalt

stats.f_oneway(spanien, italien, griechenland, marokko, indien)

F_onewayResult(statistic=11.873757820341998, pvalue=3.3733416696757513e-06)

from statsmodels.sandbox.stats.multicomp import MultiComparison

muc = MultiComparison(zitronen.Vitamin_C_Gehalt, zitronen.Land)

res = muc.allpairtest(stats.ttest_ind, method='bonferroni')
res[0]

res = muc.allpairtest(stats.ttest_ind, method='holm')
res[0]

res[1]

(array([[-4.95235073e+00,  2.12516086e-04],
        [ 1.11298427e+00,  2.84460819e-01],
        [-3.53387275e+00,  3.30511610e-03],
        [-1.94781061e+00,  7.17760991e-02],
        [ 6.20082296e+00,  2.31091193e-05],
        [ 3.18345880e-01,  7.54921930e-01],
        [ 3.32261512e+00,  5.03074269e-03],
        [-4.33123161e+00,  6.90566981e-04],
        [-3.30423221e+00,  5.21815835e-03],
        [ 2.27858634e+00,  3.88961763e-02]]),
 array([ True, False,  True, False,  True, False,  True,  True,  True,
        False]),
 array([1.91264477e-03, 5.68921637e-01, 2.31358127e-02, 2.15328297e-01,
        2.31091193e-04, 7.54921930e-01, 3.01844561e-02, 5.52453585e-03,
        3.01844561e-02, 1.55584705e-01]),
 0.005116196891823743,
 0.005)

p_werte_korrigiert = res[1][2]
p_werte_korrigiert

array([1.91264477e-03, 5.68921637e-01, 2.31358127e-02, 2.15328297e-01,
       2.31091193e-04, 7.54921930e-01, 3.01844561e-02, 5.52453585e-03,
       3.01844561e-02, 1.55584705e-01])

stats.f_oneway(spanien, italien, griechenland, marokko, indien).pvalue

3.3733416696757513e-06

res = muc.allpairtest(stats.ttest_ind, alpha=5.0E-6, method='holm')

res[0]

pingus = sns.load_dataset('penguins')
pingus.head()

pingus_mit_gewichtsangabe = pingus[pingus.body_mass_g.notna()]

pingus_mit_gewichtsangabe.describe()

pingus.dropna().describe()

pingus.species.value_counts()

species
Adelie       152
Gentoo       124
Chinstrap     68
Name: count, dtype: int64

gA = pingus[pingus.species=='Adelie'].body_mass_g.dropna()
gC = pingus[pingus.species=='Chinstrap'].body_mass_g.dropna()
gG = pingus[pingus.species=='Gentoo'].body_mass_g.dropna()

stats.f_oneway(gA, gC, gG)

F_onewayResult(statistic=343.626275205481, pvalue=2.892368133377283e-82)

muc = MultiComparison(pingus_mit_gewichtsangabe.body_mass_g, pingus_mit_gewichtsangabe.island)

res = muc.allpairtest(stats.ttest_ind, method='bonferroni')

res[0]

u_schad = "https://www.math.uni-duesseldorf.de/~braun/bio2324/data/schadstoffe.csv"
schadstoffe = pd.read_csv(u_schad, index_col=0)
schadstoffe.head()

g1 = schadstoffe[schadstoffe.Messstelle==1].Konzentration
g2 = schadstoffe[schadstoffe.Messstelle==2].Konzentration
g3 = schadstoffe[schadstoffe.Messstelle==3].Konzentration
g4 = schadstoffe[schadstoffe.Messstelle==4].Konzentration
g5 = schadstoffe[schadstoffe.Messstelle==5].Konzentration

schadstoffe.Konzentration.std()

0.00034083567416156316

g1.std()

0.0003088278193577403

g2.std()

0.0004360906113112883

g3.std()

0.00033459177573784817

g4.std()

0.00032047637643428304

g5.std()

0.0003095504974203532

ax = sns.boxplot(schadstoffe, y="Konzentration", x="Messstelle");

sns.boxplot(schadstoffe, y="Konzentration", ax=ax)
ax.figure

pingus_mit_gewichtsangabe.body_mass_g.std()

801.9545356980955

gA.std()

458.5661259101348

gC.std()

384.3350813871914

gG.std()

504.11623665709163

ax = sns.boxplot(pingus_mit_gewichtsangabe, y='body_mass_g', x='species');

sns.boxplot(pingus_mit_gewichtsangabe, y="body_mass_g", ax=ax)
ax.figure

la = [42, 115, 73, 58, 64, 100, 112, 112, 52, 70,
    152, 215, 176, 97, 145, 100, 82, 94, 165, 97]

lb = [165, 165, 152, 173, 225, 173, 136, 145, 188, 155,
    197, 233, 236, 264, 200, 252, 258, 212, 145, 273]

lc = [236, 185, 339, 255, 264, 325, 267, 215, 233, 295,
    255, 264, 224, 245, 248, 309, 264, 273, 294, 230]

stats.levene(la, lb, lc)

LeveneResult(statistic=0.6457341109631508, pvalue=0.5280694573759905)

stats.f_oneway(la, lb, lc)

F_onewayResult(statistic=67.41573785674238, pvalue=9.5327270117002e-16)

barsche = pd.read_csv('barsche.csv')
barsche.head()

sns.boxplot(data=barsche, x="Art", y="Länge");

ds = barsche[barsche.Art=='gestreift'].Länge
dl = barsche[barsche.Art=='gefleckt'].Länge
db = barsche[barsche.Art=='blau'].Länge
dr = barsche[barsche.Art=='braun'].Länge

stats.levene(ds, dl, dr, db)

LeveneResult(statistic=13.459492972830807, pvalue=1.3472893996510424e-07)

stats.alexandergovern(ds, dl, dr, db)

AlexanderGovernResult(statistic=113.40810114676775, pvalue=2.02668339537414e-24)

stats.alexandergovern(la, lb, lc)

AlexanderGovernResult(statistic=56.82538049315829, pvalue=4.576415099851209e-13)

stats.f_oneway(la, lb, lc)

F_onewayResult(statistic=67.41573785674238, pvalue=9.5327270117002e-16)

	species	island	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	Male
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	Female
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	Female
3	Adelie	Torgersen	NaN	NaN	NaN	NaN	NaN
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	Female

	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g
count	342.000000	342.000000	342.000000	342.000000
mean	43.921930	17.151170	200.915205	4201.754386
std	5.459584	1.974793	14.061714	801.954536
min	32.100000	13.100000	172.000000	2700.000000
25%	39.225000	15.600000	190.000000	3550.000000
50%	44.450000	17.300000	197.000000	4050.000000
75%	48.500000	18.700000	213.000000	4750.000000
max	59.600000	21.500000	231.000000	6300.000000

	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g
count	333.000000	333.000000	333.000000	333.000000
mean	43.992793	17.164865	200.966967	4207.057057
std	5.468668	1.969235	14.015765	805.215802
min	32.100000	13.100000	172.000000	2700.000000
25%	39.500000	15.600000	190.000000	3550.000000
50%	44.500000	17.300000	197.000000	4050.000000
75%	48.600000	18.700000	213.000000	4775.000000
max	59.600000	21.500000	231.000000	6300.000000

group1	group2	stat	pval	pval_corr	reject
Biscoe	Dream	12.9663	0.0	0.0	True
Biscoe	Torgersen	8.7781	0.0	0.0	True
Dream	Torgersen	0.0924	0.9265	1.0	False

	Messstelle	Konzentration
0	5	0.000867
1	3	0.000490
2	1	0.000589
3	1	0.000950
4	4	0.001152

Mathematik für Biologiestudierende¶

Wiederholung (interaktiv)¶

Themen heute¶

Multiples Testen¶

Beispiel Gummibärchen¶

ANOVA¶

Post-hoc Analyse¶

Beispiel Zitronen¶

Paarvergleiche¶

Ablesung genauerer Werte¶

Behandlung von NaN¶

Was hat ANOVA mit Varianzen bzw. Stichprobenstreuungen zu tun?¶

Jetzt dasselbe mit den Pinguinen¶

Heteroskedastizität¶

Der Levene-Test¶

Beispiel¶

Beispiel: Barsche¶

Probleme beim Test auf Heteroskedastizität¶

Alexander-Govern-Test¶

	Vitamin_C_Gehalt	Land
0	494.5	Spanien
1	499.2	Spanien
2	494.3	Spanien
3	478.0	Spanien
4	500.1	Spanien

group1	group2	stat	pval	pval_corr	reject
Griechenland	Indien	-4.9524	0.0002	0.0021	True
Griechenland	Italien	1.113	0.2845	1.0	False
Griechenland	Marokko	-3.5339	0.0033	0.0331	True
Griechenland	Spanien	-1.9478	0.0718	0.7178	False
Indien	Italien	6.2008	0.0	0.0002	True
Indien	Marokko	0.3183	0.7549	1.0	False
Indien	Spanien	3.3226	0.005	0.0503	False
Italien	Marokko	-4.3312	0.0007	0.0069	True
Italien	Spanien	-3.3042	0.0052	0.0522	False
Marokko	Spanien	2.2786	0.0389	0.389	False

	Art	Länge
0	gestreift	9.890006
1	gestreift	9.343944
2	gestreift	9.867069
3	gestreift	10.302781
4	gestreift	10.066964