import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
sns.set_theme()

df = sns.load_dataset("penguins") 
gA = df[df.species=='Adelie'].bill_length_mm
gG = df[df.species=='Gentoo'].bill_length_mm
gC = df[df.species=='Chinstrap'].bill_length_mm

res = stats.f_oneway(gA.dropna(), gG.dropna(), gC.dropna())
res

F_onewayResult(statistic=410.6002550405077, pvalue=2.6946137388895484e-91)

r1 = stats.ttest_ind(gA.dropna(), gG.dropna())
r1

TtestResult(statistic=-25.09530115900974, pvalue=9.324042980315958e-73, df=272.0)

r2 = stats.ttest_ind(gA.dropna(), gC.dropna())
r2

TtestResult(statistic=-23.801939237440887, pvalue=2.011759018655462e-62, df=217.0)

r3 = stats.ttest_ind(gG.dropna(), gC.dropna())
r3

TtestResult(statistic=-2.7694045269151144, pvalue=0.006175813141889592, df=189.0)

from statsmodels.sandbox.stats.multicomp import MultiComparison

df_dropped = df[df.bill_length_mm.notnull()]
df_dropped.head()

muc = MultiComparison(df_dropped.bill_length_mm, df_dropped.species)

muc.allpairtest(stats.ttest_ind, alpha=0.01, method='bonferroni')[0]

muc.allpairtest(stats.ttest_ind, alpha=0.01, method='holm')[0]

sns.displot(df, x='bill_length_mm', col='species');

df = sns.load_dataset('taxis')

df.head()

df.dropoff_borough.value_counts()

dropoff_borough
Manhattan        5206
Queens            542
Brooklyn          501
Bronx             137
Staten Island       2
Name: count, dtype: int64

m = df[df.dropoff_borough=='Manhattan'].tip
q = df[df.dropoff_borough=='Queens'].tip
b = df[df.dropoff_borough=='Brooklyn'].tip
x = df[df.dropoff_borough=='Bronx'].tip
s = df[df.dropoff_borough=='Staten Island'].tip

stats.f_oneway(m, q, b, x, s)

F_onewayResult(statistic=30.918043439442613, pvalue=1.5552027373832333e-25)

df_dropped = df.dropna()

df_dropped.describe()

muc = MultiComparison(df_dropped.tip, df_dropped.dropoff_borough)

muc.allpairtest(stats.ttest_ind, method='holm')[0]

	species	island	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	Male
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	Female
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	Female
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	Female
5	Adelie	Torgersen	39.3	20.6	190.0	3650.0	Male

	pickup	dropoff	passengers	distance	fare	tip	total	color	payment	pickup_zone	dropoff_zone	pickup_borough	dropoff_borough
0	2019-03-23 20:21:09	2019-03-23 20:27:24	1	1.60	7.0	2.15	12.95	yellow	credit card	Lenox Hill West	UN/Turtle Bay South	Manhattan	Manhattan
1	2019-03-04 16:11:55	2019-03-04 16:19:00	1	0.79	5.0	0.00	9.30	yellow	cash	Upper West Side South	Upper West Side South	Manhattan	Manhattan
2	2019-03-27 17:53:01	2019-03-27 18:00:25	1	1.37	7.5	2.36	14.16	yellow	credit card	Alphabet City	West Village	Manhattan	Manhattan
3	2019-03-10 01:23:59	2019-03-10 01:49:51	1	7.70	27.0	6.15	36.95	yellow	credit card	Hudson Sq	Yorkville West	Manhattan	Manhattan
4	2019-03-30 13:27:42	2019-03-30 13:37:14	3	2.16	9.0	1.10	13.40	yellow	credit card	Midtown East	Yorkville West	Manhattan	Manhattan

	pickup	dropoff	passengers	distance	fare	tip	tolls	total
count	6341	6341	6341.000000	6341.000000	6341.000000	6341.000000	6341.000000	6341.000000
mean	2019-03-16 08:30:26.574830080	2019-03-16 08:44:47.525784832	1.544078	2.997707	12.887931	1.972703	0.314793	18.310263
min	2019-02-28 23:29:03	2019-02-28 23:32:35	0.000000	0.000000	1.000000	0.000000	0.000000	1.300000
25%	2019-03-08 15:28:20	2019-03-08 15:54:00	1.000000	0.990000	6.500000	0.000000	0.000000	10.800000
50%	2019-03-15 21:57:47	2019-03-15 22:07:48	1.000000	1.650000	9.500000	1.750000	0.000000	14.160000
75%	2019-03-23 17:45:29	2019-03-23 17:57:56	2.000000	3.200000	15.000000	2.820000	0.000000	20.300000
max	2019-03-31 23:43:45	2019-04-01 00:13:58	6.000000	36.700000	150.000000	23.190000	24.020000	174.820000
std	NaN	NaN	1.207948	3.719775	10.722249	2.361897	1.369174	12.950365

Mathematik für Biologiestudierende II¶

Multiples Testen¶

Beispiel Gummibärchen¶

Multiple Vergleiche¶

Bonferroni-Korrektur¶

Bonferroni-Holm-Korrektur¶

False Discovery Rate¶

ANOVA¶

Post-hoc Analyse¶

Dasselbe mit Bonferroni-Holm¶

Bonferroni-Holm¶

Beispiel Taxis¶

group1	group2	stat	pval	pval_corr	reject
Adelie	Chinstrap	-23.8019	0.0	0.0	True
Adelie	Gentoo	-25.0953	0.0	0.0	True
Chinstrap	Gentoo	2.7694	0.0062	0.0185	False

group1	group2	stat	pval	pval_corr	reject
Bronx	Brooklyn	-5.3122	0.0	0.0	True
Bronx	Manhattan	-8.0443	0.0	0.0	True
Bronx	Queens	-5.4336	0.0	0.0	True
Bronx	Staten Island	-10.6288	0.0	0.0	True
Brooklyn	Manhattan	-0.4253	0.6706	0.6706	False
Brooklyn	Queens	-2.307	0.0213	0.0425	True
Brooklyn	Staten Island	-5.9661	0.0	0.0	True
Manhattan	Queens	-4.3851	0.0	0.0	True
Manhattan	Staten Island	-8.4057	0.0	0.0	True
Queens	Staten Island	-4.2013	0.0	0.0001	True