import pandas as pd
import numpy as np
from pandas import Series
from pandas import DataFrame
school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
{'name': 'Nate', 'job': "teacher", 'age': 35},
{'name': 'Yuna', 'job': "teacher", 'age': 37},
{'name': 'Abraham', 'job': "student", 'age': 10},
{'name': 'Brian', 'job': "student", 'age': 12},
{'name': 'Janny', 'job': "student", 'age': 11},
{'name': 'Nate', 'job': "teacher", 'age': None},
{'name': 'John', 'job': "student", 'age': None}
]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df
name job age
0 John teacher 40.0
1 Nate teacher 35.0
2 Yuna teacher 37.0
3 Abraham student 10.0
4 Brian student 12.0
5 Janny student 11.0
6 Nate teacher NaN
7 John student NaN
#행, 열의 갯수 출력하기
df.shape
(8, 3)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 8 non-null object
1 job 8 non-null object
2 age 6 non-null float64
dtypes: float64(1), object(2)
memory usage: 320.0+ bytes
#age의 2개가 NaN값인 것을 알 수 있다.
df.isna()
name job age
0 False False False
1 False False False
2 False False False
3 False False False
4 False False False
5 False False False
6 False False True
7 False False True
df.isnull()
name job age
0 False False False
1 False False False
2 False False False
3 False False False
4 False False False
5 False False False
6 False False True
7 False False True
Null 또는 NaN 값을 확인할 수 있다.
df.age = df.age.fillna(0)
df
name job age
0 John teacher 40.0
1 Nate teacher 35.0
2 Yuna teacher 37.0
3 Abraham student 10.0
4 Brian student 12.0
5 Janny student 11.0
6 Nate teacher 0.0
7 John student 0.0
df['age'].fillna(df.groupby('job')['age'].transform('median'), inplace=True)
df
name job age
0 John teacher 40.0
1 Nate teacher 35.0
2 Yuna teacher 37.0
3 Abraham student 10.0
4 Brian student 12.0
5 Janny student 11.0
6 Nate teacher 37.0
7 John student 11.0
👍