import numpy as np
np.set_printoptions(legacy='1.21')
from scipy import stats
import pandas as pd
import seaborn as sns
sns.set_theme()

u = "https://www.math.uni-duesseldorf.de/~braun/bio2324/data/schadstoffe.csv"
df = pd.read_csv(u, index_col=0)
df.head()

import statsmodels.api as sm

pp = sm.ProbPlot(df.Konzentration)
pp.qqplot();

df = pd.read_csv('treatment.csv', index_col=0)

pp = sm.ProbPlot(df.t0)
pp.qqplot();

df = pd.read_csv("galapagos.csv", index_col=0)
df.head()

pp = sm.ProbPlot(df.Area)
pp.qqplot();

u = "https://www.math.uni-duesseldorf.de/~braun/bio2324/data/schadstoffe.csv"
df = pd.read_csv(u, index_col=0)
df['Referenz'] = 0.08 / 100

res = stats.wilcoxon(df.Konzentration, df.Referenz, alternative="greater")
res

WilcoxonResult(statistic=2169.0, pvalue=0.004229703509534525)

stats.ttest_rel(df.Konzentration, df.Referenz, alternative="greater")

TtestResult(statistic=2.768040010585661, pvalue=0.0035114445640696246, df=79)

res = stats.wilcoxon(df.Konzentration, df.Referenz, alternative="greater", method="approx")
res

WilcoxonResult(statistic=2169.0, pvalue=0.004229703509534525)

res.zstatistic

2.6331616685404655

n = df.Konzentration.count()
n

80

r = abs(res.zstatistic / np.sqrt(n))
r

0.2943964243301625

df.describe()

d = (0.000905-0.0008) / 0.000341
d

0.3079178885630497

df = pd.read_csv("galapagos.csv")
sns.scatterplot(df.Species);

reich = df[df.Species>=150]
arm = df[df.Species<150]

reich.describe()

arm.describe()

res = stats.mannwhitneyu(reich.Area, arm.Area, alternative='greater')
res

MannwhitneyuResult(statistic=122.0, pvalue=0.0005123847398218562)

U = res.statistic
n1 = 5
n2 = 25
r = 1 - 2*U/(n1*n2)
r

-0.952

r = 2*U/(n1*n2) - 1
r

0.952

stats.mannwhitneyu(arm.Area, reich.Area, alternative='less')

MannwhitneyuResult(statistic=3.0, pvalue=0.0005123847398218562)

1 - 2*3/(n1*n2)

0.952

df['Rang'] = df.Area.rank()
df.head()

arm = df[df.Species<150]
reich = df[df.Species>=150]
T1 = reich.Rang.sum()
T1

137.0

T2 = arm.Rang.sum()
T2

328.0

U1 = n1*n2 + n1*(n1+1)/2 - T1
U1

3.0

U2 = n1*n2 + n2*(n2+1)/2 - T2
U2

122.0

	Species	Area	Elevation	Nearest	Scruz	Adjacent
Baltra	58	25.09	346	0.6	0.6	1.84
Bartolome	31	1.24	109	0.6	26.3	572.33
Caldwell	3	0.21	114	2.8	58.7	0.78
Champion	25	0.10	46	1.9	47.4	0.18
Coamano	2	0.05	77	1.9	1.9	903.82

Vergeich	parametrisch	nicht-parametrisch
mit Referenzwert	t-Test für verbundene Stichproben	Wilcoxon-Test
vorher-nachher	t-Test für verbundene Stichproben	Wilcoxon-Test
verschiedene Populationen	t-Test für unverbundene Stichproben	Mann-Whitney-U-Test

Mathematik für Biologiestudierende¶

Normalverteilungsannahmen¶

	Species	Area	Elevation	Nearest	Scruz	Adjacent
count	5.00000	5.000000	5.000000	5.000000	5.000000	5.000000
mean	318.60000	1373.602000	966.600000	9.860000	32.740000	128.114000
std	80.32621	1860.550153	427.743849	19.777462	25.876205	283.079535
min	237.00000	170.920000	640.000000	0.200000	0.000000	0.100000
25%	280.00000	551.620000	716.000000	0.600000	19.800000	0.520000
50%	285.00000	572.330000	864.000000	0.700000	28.100000	0.570000
75%	347.00000	903.820000	906.000000	2.600000	49.200000	4.890000
max	444.00000	4669.320000	1707.000000	45.200000	66.600000	634.490000

	Species	Area	Elevation	Nearest	Scruz	Adjacent
count	25.000000	25.000000	25.000000	25.000000	25.000000	25.000000
mean	38.560000	39.330000	248.320000	10.100000	61.824000	287.695200
std	34.467473	127.392845	307.762046	13.454182	73.035592	940.916069
min	2.000000	0.010000	25.000000	0.400000	0.400000	0.030000
25%	10.000000	0.210000	93.000000	1.100000	10.700000	0.520000
50%	25.000000	1.240000	147.000000	4.300000	47.400000	2.850000
75%	58.000000	17.950000	259.000000	10.700000	88.300000	59.560000
max	108.000000	634.490000	1494.000000	47.400000	290.200000	4669.320000