import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
sns.set_theme()

stats.binomtest(35, 50)

BinomTestResult(k=35, n=50, alternative='two-sided', statistic=0.7, pvalue=0.006600447966810918)

u_schad = "https://www.math.uni-duesseldorf.de/~braun/bio2324/data/schadstoffe.csv"
df = pd.read_csv(u_schad, index_col=0)
df

df.Messstelle.value_counts()

Messstelle
4    19
1    17
3    16
5    14
2    14
Name: count, dtype: int64

g1 = df[df.Messstelle==1].Konzentration
g1

2     0.000589
3     0.000950
13    0.001301
14    0.001605
18    0.000927
22    0.001250
28    0.000965
33    0.000669
41    0.000712
42    0.001019
45    0.000780
54    0.001306
61    0.001006
64    0.001057
65    0.000381
70    0.000919
74    0.001323
Name: Konzentration, dtype: float64

g2 = df[df.Messstelle==2].Konzentration
g3 = df[df.Messstelle==3].Konzentration
g4 = df[df.Messstelle==4].Konzentration
g5 = df[df.Messstelle==5].Konzentration

res = stats.f_oneway(g1, g2, g3, g4, g5)
res

F_onewayResult(statistic=0.8666121588849811, pvalue=0.48807057520065544)

P = stats.f(4, 75)
1 - P.cdf(res.statistic)

0.48807057520065544

res.pvalue

0.48807057520065544

df = sns.load_dataset("penguins") 
df.head()

df.species.value_counts()

species
Adelie       152
Gentoo       124
Chinstrap     68
Name: count, dtype: int64

gA = df[df.species=='Adelie'].bill_length_mm
gA

0      39.1
1      39.5
2      40.3
3       NaN
4      36.7
       ... 
147    36.6
148    36.0
149    37.8
150    36.0
151    41.5
Name: bill_length_mm, Length: 152, dtype: float64

gG = df[df.species=='Gentoo'].bill_length_mm
gC = df[df.species=='Chinstrap'].bill_length_mm

stats.f_oneway(gA, gG, gC)

F_onewayResult(statistic=nan, pvalue=nan)

res = stats.f_oneway(gA.dropna(), gG.dropna(), gC.dropna())

df = pd.read_csv(u_schad)
df.describe()

g1.std()

0.0003088278193577403

g2.std()

0.0004360906113112883

g3.std()

0.00033459177573784817

g4.std()

0.00032047637643428304

g5.std()

0.0003095504974203532

sns.scatterplot(df, y='Konzentration', x='Messstelle', hue='Messstelle', legend=False);

df = sns.load_dataset("penguins")
df.describe()

gA.std()

2.663404848368619

gC.std()

3.3392558959358865

gG.std()

3.0818573721142872

sns.scatterplot(df, y='bill_length_mm', x='species', hue='species', legend=False);

	species	island	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g	sex
0	Adelie	Torgersen	39.1	18.7	181.0	3750.0	Male
1	Adelie	Torgersen	39.5	17.4	186.0	3800.0	Female
2	Adelie	Torgersen	40.3	18.0	195.0	3250.0	Female
3	Adelie	Torgersen	NaN	NaN	NaN	NaN	NaN
4	Adelie	Torgersen	36.7	19.3	193.0	3450.0	Female

	Unnamed: 0	Messstelle	Konzentration
count	80.0000	80.000000	80.000000
mean	39.5000	2.987500	0.000905
std	23.2379	1.409675	0.000341
min	0.0000	1.000000	0.000061
25%	19.7500	2.000000	0.000701
50%	39.5000	3.000000	0.000938
75%	59.2500	4.000000	0.001158
max	79.0000	5.000000	0.001605

	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g
count	342.000000	342.000000	342.000000	342.000000
mean	43.921930	17.151170	200.915205	4201.754386
std	5.459584	1.974793	14.061714	801.954536
min	32.100000	13.100000	172.000000	2700.000000
25%	39.225000	15.600000	190.000000	3550.000000
50%	44.450000	17.300000	197.000000	4050.000000
75%	48.500000	18.700000	213.000000	4750.000000
max	59.600000	21.500000	231.000000	6300.000000

Mathematik für Biologiestudierende II¶

Data Snooping¶

Szenario 1¶

Szenario 2¶

ANOVA¶

Beispiel Schadstoffkonzentration¶

Haben unterschiedliche Pinguinarten unterschiedliche Schnabellängen?¶

Was hat das mit den Varianzen bzw. Stichprobenstreuungen zu tun?¶

	Messstelle	Konzentration
0	5	0.000867
1	3	0.000490
2	1	0.000589
3	1	0.000950
4	4	0.001152
...	...	...
75	5	0.000918
76	3	0.000528
77	3	0.000961
78	4	0.001272
79	3	0.001012