import pandas as pd
import numpy as np
16 Aggregate
= pd.DataFrame([('bird', 'Falconiformes', 389.0),
df 'bird', 'Psittaciformes', 24.0),
('mammal', 'Carnivora', 80.2),
('mammal', 'Primates', np.nan),
('mammal', 'Carnivora', 58)],
(=['falcon', 'parrot', 'lion', 'monkey', 'leopard'],
index=('class', 'order', 'max_speed'))
columns df
class | order | max_speed | |
---|---|---|---|
falcon | bird | Falconiformes | 389.0 |
parrot | bird | Psittaciformes | 24.0 |
lion | mammal | Carnivora | 80.2 |
monkey | mammal | Primates | NaN |
leopard | mammal | Carnivora | 58.0 |
16.0.1 groupby()
: group by categorical
= df.groupby('class')
grouped grouped.groups
{'bird': ['falcon', 'parrot'], 'mammal': ['lion', 'monkey', 'leopard']}
= df.groupby(['class', 'order'])
grouped2 grouped2.groups
{('bird', 'Falconiformes'): ['falcon'], ('bird', 'Psittaciformes'): ['parrot'], ('mammal', 'Carnivora'): ['lion', 'leopard'], ('mammal', 'Primates'): ['monkey']}
grouped.size()
class
bird 2
mammal 3
dtype: int64
grouped.mean()
/var/folders/rb/99nqfz7s2rb6d_p0d6yxtbxc0000gn/T/ipykernel_40383/2755795945.py:1: FutureWarning:
The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
max_speed | |
---|---|
class | |
bird | 206.5 |
mammal | 69.1 |
'class')['max_speed'].apply(np.mean) df.groupby(
class
bird 206.5
mammal 69.1
Name: max_speed, dtype: float64