import numpy as np
import pandas as pd
import seaborn as sns
sns.set_theme()

u = "https://www.math.uni-duesseldorf.de/~braun/bio2324/data/schadstoffe.csv"
df = pd.read_csv(u, index_col=0)
df

import statsmodels.api as sm

pp = sm.ProbPlot(df.Konzentration)
pp.qqplot();

df = pd.read_csv('treatment.csv', index_col=0)

pp = sm.ProbPlot(df.t0)
pp.qqplot();

df = pd.read_csv("galapagos.csv", index_col=0)
df

pp = sm.ProbPlot(df.Area)
pp.qqplot();

from scipy import stats

df = pd.read_csv(u, index_col=0)
df['Referenz'] = 0.08 / 100

res = stats.wilcoxon(df.Konzentration, df.Referenz, alternative="greater")
res

WilcoxonResult(statistic=2169.0, pvalue=0.004229703509534525)

stats.ttest_rel(df.Konzentration, df.Referenz, alternative="greater")

TtestResult(statistic=2.768040010585661, pvalue=0.0035114445640696246, df=79)

res = stats.wilcoxon(df.Konzentration, df.Referenz, alternative="greater", method="approx")
res

WilcoxonResult(statistic=2169.0, pvalue=0.004229703509534525)

res.zstatistic

2.6331616685404655

n = df.Konzentration.count()
n

80

r = abs(res.zstatistic / np.sqrt(n))
r

0.2943964243301625

df = pd.read_csv('treatment.csv', index_col=0)
dfv = df[df.Treatment=='Verum']
dfp = df[df.Treatment=='Placebo']

res = stats.mannwhitneyu(dfv.Difference, dfp.Difference, alternative='greater')
res

MannwhitneyuResult(statistic=416814.5, pvalue=0.008213617991035723)

stats.ttest_ind(dfv.Difference, dfp.Difference, alternative='greater')

TtestResult(statistic=2.314493969317715, pvalue=0.010377396661800722, df=1767.0)

pp = sm.ProbPlot(dfv.Difference)
pp.qqplot();

res2 = stats.mannwhitneyu(dfp.Difference, dfv.Difference, alternative='less')
res2

MannwhitneyuResult(statistic=365519.5, pvalue=0.008213617991035723)

U = res2.statistic
n1 = dfp.Difference.count()
n2 = dfv.Difference.count()

r = 1 - 2*U/(n1*n2)
r

0.06556662499648491

	Species	Area	Elevation	Nearest	Scruz	Adjacent
Baltra	58	25.09	346	0.6	0.6	1.84
Bartolome	31	1.24	109	0.6	26.3	572.33
Caldwell	3	0.21	114	2.8	58.7	0.78
Champion	25	0.10	46	1.9	47.4	0.18
Coamano	2	0.05	77	1.9	1.9	903.82
Daphne.Major	18	0.34	119	8.0	8.0	1.84
Daphne.Minor	24	0.08	93	6.0	12.0	0.34
Darwin	10	2.33	168	34.1	290.2	2.85
Eden	8	0.03	71	0.4	0.4	17.95
Enderby	2	0.18	112	2.6	50.2	0.10
Espanola	97	58.27	198	1.1	88.3	0.57
Fernandina	93	634.49	1494	4.3	95.3	4669.32
Gardner1	58	0.57	49	1.1	93.1	58.27
Gardner2	5	0.78	227	4.6	62.2	0.21
Genovesa	40	17.35	76	47.4	92.2	129.49
Isabela	347	4669.32	1707	0.7	28.1	634.49
Marchena	51	129.49	343	29.1	85.9	59.56
Onslow	2	0.01	25	3.3	45.9	0.10
Pinta	104	59.56	777	29.1	119.6	129.49
Pinzon	108	17.95	458	10.7	10.7	0.03
Las.Plazas	12	0.23	94	0.5	0.6	25.09
Rabida	70	4.89	367	4.4	24.4	572.33
SanCristobal	280	551.62	716	45.2	66.6	0.57
SanSalvador	237	572.33	906	0.2	19.8	4.89
SantaCruz	444	903.82	864	0.6	0.0	0.52
SantaFe	62	24.08	259	16.5	16.5	0.52
SantaMaria	285	170.92	640	2.6	49.2	0.10
Seymour	44	1.84	147	0.6	9.6	25.09
Tortuga	16	1.24	186	6.8	50.9	17.95
Wolf	21	2.85	253	34.1	254.7	2.33

Vergeich	parametrisch	nicht-parametrisch
mit Referenzwert	t-Test für verbundene Stichproben	Wilcoxon-Test
vorher-nachher	t-Test für verbundene Stichproben	Wilcoxon-Test
verschiedene Populationen	t-Test für unverbundene Stichproben	Mann-Whitney-U-Test

Mathematik für Biologiestudierende II¶

Normalverteilungsannahmen¶