이론
1.
A B
0 1 0
1 2 2
2 0 3
3 4 4
2. x
3. x
4. x
5. x
#실습
#1.
import pandas as pd
import numpy as np
data = {
'A': [1, 2, np.nan, 4],
'B': [np.nan, 2, 3, 4],
}
df = pd.DataFrame(data)
df = df.fillna('unknown')
#2.
import pandas as pd
import numpy as np
data_list = [
{'yyyy.mm.dd': '2000.06.28'},
{'yyyy.mm.dd': '2003.03.28'},
{'yyyy.mm.dd': '2006.08.25'}
]
df = pd.DataFrame(data_list)
df['month'] = df['yyyy.mm.dd'].apply(lambda date: date.split('.')[1])
print(df)
#3.
import pandas as pd
import numpy as np
data = [
{'학번': '20-123456', '하츄핑': 'A', '과목1': 59.745, '과목2': 33.678},
{'학번': '22-135791', '차차핑': 'B', '과목1': 26.234, '과목2': 89.123},
{'학번': '24-246802', '바로핑': 'A', '과목1': 79.521, '과목2': 11.937}
]
df = pd.DataFrame(data)
df['학번'] = df['학번'].apply(lambda x: x.split('-')[0])
df['이름'] = df.index + 1
df['과목1'] = df['과목1'].round(1)
df['과목2'] = df['과목2'].round(1)
df['평균 점수'] = ((df['과목1'] + df['과목2']) / 2).round(1)
df = df[['학번', '이름', '과목1', '과목2', '평균 점수']]
print(df)
#4.
import pandas as pd
df_list = [
{'name': 'Alice', 'job': 'Teacher'},
{'name': 'Bob', 'job': 'Doctor'},
{'name': 'Charlie', 'job': 'Artist'},
{'name': 'David', 'job': 'Teacher'},
{'name': 'Eve', 'job': 'Scientist'},
{'name': 'Frank', 'job': 'Engineer'},
{'name': 'Grace', 'job': 'Artist'},
{'name': 'Hannah', 'job': 'Teacher'}
]
df = pd.DataFrame(df_list, columns=['name', 'job'])
df_job = df['job'].value_counts().reset_index()
df_job.columns = ['job', 'count']
total_count = df_job['count'].sum()
df_job['percentage'] = (df_job['count'] / total_count) * 100
df_job = df_job[['job', 'percentage']]
print(df_job)
#5.
import pandas as pd
names_A = ["학생1", "학생2", "학생3"]
scores_A = [85, 90, 88]
df_A = pd.DataFrame({'이름': names_A, '점수': scores_A})
names_B = ["학생4", "학생5", "학생6"]
scores_B = [78, 82, 80]
df_B = pd.DataFrame({'이름': names_B, '점수': scores_B})
df = pd.concat([df_A, df_B], ignore_index=True)
overall_mean = df['점수'].mean()
def diff(score):
return score - overall_mean
df['편차'] = df['점수'].apply(diff)
df['전체 평균'] = overall_mean
print(df)