import pandas as pd
import re
13 Data I/O
13.1 Read CSV
= pd.read_csv('~/icloud/Data/iris.csv')
iris iris
Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
13.2 Specify data types at read time
= pd.read_csv('~/icloud/Data/iris.csv',
iris = {'Species': 'category'})
dtype iris.shape
(150, 5)
13.3 Drop duplicates
= iris.drop_duplicates()
iris iris.shape
(149, 5)
13.4 Clean column names
= [re.sub('\.', '_', col) for col in list(iris.columns)]
iris.columns iris.columns
Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
'Species'],
dtype='object')
13.5 Write CSV
'~/icloud/Data/irisp.csv') iris.to_csv(
13.6 Write parquet
'~/icloud/Data/irisp.parquet') iris.to_parquet(