import pandas as pd

iris = pd.read_csv("iris/iris_extended.csv")

iris = pd.read_csv("https://github.com/Ruediger-Braun/mfb2425/raw/refs/heads/master/iris/iris_extended.csv")

iris

fish = pd.read_csv("https://stats.idre.ucla.edu/stat/data/fish.csv")
fish

iris['soil_type']

0       sandy
1        clay
2       sandy
3        clay
4       loamy
        ...  
1195    loamy
1196     clay
1197     clay
1198    sandy
1199    loamy
Name: soil_type, Length: 1200, dtype: object

bluetenblattlaenge = iris.petal_length

bluetenblattlaenge

0       1.64
1       1.53
2       1.47
3       1.53
4       1.23
        ... 
1195    5.16
1196    7.68
1197    4.72
1198    5.30
1199    5.64
Name: petal_length, Length: 1200, dtype: float64

iris.loc[1100]

species                            virginica
elevation                              108.4
soil_type                              loamy
sepal_length                            6.34
sepal_width                             3.09
petal_length                             5.7
petal_width                             2.17
sepal_area                           19.5906
petal_area                            12.369
sepal_aspect_ratio                   2.05178
petal_aspect_ratio                  2.626728
sepal_to_petal_length_ratio         1.112281
sepal_to_petal_width_ratio          1.423963
sepal_petal_length_diff                 0.64
sepal_petal_width_diff                  0.92
petal_curvature_mm                      9.46
petal_texture_trichomes_per_mm2         11.5
leaf_area_cm2                          73.17
sepal_area_sqrt                     4.426127
petal_area_sqrt                     3.516959
area_ratios                         1.583847
Name: 1100, dtype: object

maeuse = pd.DataFrame()  #  leerer DataFrame

maeuse['Gewicht'] = [21.3, 19.8, 20.4, 19.0, 22.7]

maeuse

maeuse.mean()

Gewicht    20.64
dtype: float64

bluetenblattlaenge.mean()

np.float64(3.80795)

import numpy as np
np.set_printoptions(legacy='1.21')

bluetenblattlaenge.mean()

3.80795

maeuse.median()

Gewicht    20.4
dtype: float64

maeuse2 = pd.DataFrame()
maeuse2['Gewicht'] = [21.3, 19.8, 20.4, 19.0, 22.7, 287]

maeuse.mean()

Gewicht    20.64
dtype: float64

maeuse2.mean()

Gewicht    65.033333
dtype: float64

maeuse.median()

Gewicht    20.4
dtype: float64

maeuse2.median()

Gewicht    20.85
dtype: float64

maeuse.var()  # empirische Varianz

Gewicht    2.033
dtype: float64

maeuse.std()  # Stichprobenstreuung

Gewicht    1.425833
dtype: float64

maeuse.var()

Gewicht    2.033
dtype: float64

maeuse.var(ddof=0)

Gewicht    1.6264
dtype: float64

maeuse.describe()

fish.describe()

bbl = bluetenblattlaenge.describe()
bbl

count    1200.000000
mean        3.807950
std         1.765469
min         0.960000
25%         1.640000
50%         4.300000
75%         5.200000
max         7.840000
Name: petal_length, dtype: float64

bbl["75%"] - bbl["25%"]  # Interquartilabstand

3.5600000000000005

(bbl["75%"] - bbl["25%"]).round(6)

3.56

import seaborn as sns

sns.boxplot(iris, x="species", y="sepal_length");

sns.boxplot(maeuse2);

	species	elevation	soil_type	sepal_length	sepal_width	petal_length	petal_width	sepal_area	petal_area	sepal_aspect_ratio	...	sepal_to_petal_length_ratio	sepal_to_petal_width_ratio	sepal_petal_length_diff	sepal_petal_width_diff	petal_curvature_mm	petal_texture_trichomes_per_mm2	leaf_area_cm2	sepal_area_sqrt	petal_area_sqrt	area_ratios
0	setosa	161.8	sandy	5.16	3.41	1.64	0.26	17.5956	0.4264	1.513196	...	3.146341	13.115385	3.52	3.15	5.33	18.33	53.21	4.194711	0.652993	41.265478
1	setosa	291.4	clay	5.48	4.05	1.53	0.37	22.1940	0.5661	1.353086	...	3.581699	10.945946	3.95	3.68	5.90	20.45	52.53	4.711051	0.752396	39.205087
2	setosa	144.3	sandy	5.10	2.80	1.47	0.38	14.2800	0.5586	1.821429	...	3.469388	7.368421	3.63	2.42	5.66	24.62	50.25	3.778889	0.747395	25.563910
3	setosa	114.6	clay	4.64	3.44	1.53	0.17	15.9616	0.2601	1.348837	...	3.032680	20.235294	3.11	3.27	4.51	22.91	50.85	3.995197	0.510000	61.367166
4	setosa	110.9	loamy	4.85	2.87	1.23	0.26	13.9195	0.3198	1.689895	...	3.943089	11.038462	3.62	2.61	4.03	21.56	40.57	3.730885	0.565509	43.525641
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1195	virginica	268.8	loamy	5.36	2.51	5.16	1.93	13.4536	9.9588	2.135458	...	1.038760	1.300518	0.20	0.58	11.61	11.52	59.46	3.667915	3.155757	1.350926
1196	virginica	125.4	clay	7.49	3.06	7.68	2.17	22.9194	16.6656	2.447712	...	0.975260	1.410138	-0.19	0.89	13.85	6.99	77.12	4.787421	4.082352	1.375252
1197	virginica	73.6	clay	6.79	3.25	4.72	2.26	22.0675	10.6672	2.089231	...	1.438559	1.438053	2.07	0.99	13.13	9.16	74.39	4.697606	3.266068	2.068725
1198	virginica	239.6	sandy	6.38	2.24	5.30	1.71	14.2912	9.0630	2.848214	...	1.203774	1.309942	1.08	0.53	11.01	6.46	73.90	3.780370	3.010482	1.576873
1199	virginica	201.5	loamy	5.16	3.20	5.64	1.43	16.5120	8.0652	1.612500	...	0.914894	2.237762	-0.48	1.77	12.09	8.59	64.31	4.063496	2.839930	2.047314

	nofish	livebait	camper	persons	child	xb	zg	count
0	1	0	0	1	0	-0.896315	3.050405	0
1	0	1	1	1	0	-0.558345	1.746149	0
2	0	1	0	1	0	-0.401731	0.279939	0
3	0	1	1	2	1	-0.956298	-0.601526	0
4	0	1	0	1	0	0.436891	0.527709	1
...	...	...	...	...	...	...	...	...
245	1	1	1	2	0	-0.755236	2.324209	0
246	0	1	1	4	3	1.794859	-5.625944	0
247	0	1	1	2	1	-0.392649	0.677275	0
248	1	1	1	3	2	1.374641	-2.595630	0
249	1	1	1	2	1	0.828834	-1.457115	0

	nofish	livebait	camper	persons	child	xb	zg	count
count	250.000000	250.000000	250.000000	250.00000	250.000000	250.000000	250.000000	250.000000
mean	0.296000	0.864000	0.588000	2.52800	0.684000	0.973796	0.252323	3.296000
std	0.457407	0.343476	0.493182	1.11273	0.850315	1.440277	2.102391	11.635028
min	0.000000	0.000000	0.000000	1.00000	0.000000	-3.275050	-5.625944	0.000000
25%	0.000000	1.000000	0.000000	2.00000	0.000000	0.008267	-1.252724	0.000000
50%	0.000000	1.000000	1.000000	2.00000	0.000000	0.954550	0.605079	0.000000
75%	1.000000	1.000000	1.000000	4.00000	1.000000	1.963855	1.993237	2.000000
max	1.000000	1.000000	1.000000	4.00000	3.000000	5.352674	4.263185	149.000000

Quartil	Quantil
1. Quartil	25%-Quantil
Median	50%-Quantil
3. Quartil	75%-Quantil

Mathematik für Biologiestudierende¶

Python¶

Installation auf eigenem Rechner¶

Warum nicht Excel?¶

Einlesen von Daten¶

Deskriptive Statistik¶

Lageparameter¶

Arithmetisches Mittel¶

Beispiel¶

Median¶

Median für geraden Stichprobenumfang¶

Einkommensverteilung¶

Robustheit¶

Beispiel zur Robustheit¶

Streuungsparameter¶

Empirische Varianz und Stichprobenstreuung¶

Konkrete Rechnung¶

Stichprobenstreuung vs. Varianz¶

Formeln für die Varianz¶

Warum $n - 1$ im Nenner?¶

Zusammenfassungen¶

Quartile¶

Beispielgrafik¶

Zusammenhang zwischen Quartilen und Quantilen¶

Interquartilabstand¶

Box-Whisker-Plots¶

	Gewicht
count	5.000000
mean	20.640000
std	1.425833
min	19.000000
25%	19.800000
50%	20.400000
75%	21.300000
max	22.700000

	Gewicht
0	21.3
1	19.8
2	20.4
3	19.0
4	22.7