이론
1.   
   A  B
0  1  0
1  2  2
2  0  3
3  4  4

2. x
3. x 
4. x
5. x
#실습
#1.
import pandas as pd
import numpy as np

data = {
    'A': [1, 2, np.nan, 4],
    'B': [np.nan, 2, 3, 4],
}
df = pd.DataFrame(data)
df = df.fillna('unknown')

#2.
import pandas as pd
import numpy as np

data_list = [
    {'yyyy.mm.dd': '2000.06.28'},
    {'yyyy.mm.dd': '2003.03.28'},
    {'yyyy.mm.dd': '2006.08.25'}
]
df = pd.DataFrame(data_list)

df['month'] = df['yyyy.mm.dd'].apply(lambda date: date.split('.')[1])
print(df)

#3.
import pandas as pd
import numpy as np

data = [
    {'학번': '20-123456', '하츄핑': 'A', '과목1': 59.745, '과목2': 33.678},
    {'학번': '22-135791', '차차핑': 'B', '과목1': 26.234, '과목2': 89.123},
    {'학번': '24-246802', '바로핑': 'A', '과목1': 79.521, '과목2': 11.937}
]
df = pd.DataFrame(data)

df['학번'] = df['학번'].apply(lambda x: x.split('-')[0]) 
df['이름'] = df.index + 1  

df['과목1'] = df['과목1'].round(1)
df['과목2'] = df['과목2'].round(1)

df['평균 점수'] = ((df['과목1'] + df['과목2']) / 2).round(1)

df = df[['학번', '이름', '과목1', '과목2', '평균 점수']]

print(df)

#4.
import pandas as pd

df_list = [
    {'name': 'Alice', 'job': 'Teacher'},
    {'name': 'Bob', 'job': 'Doctor'},
    {'name': 'Charlie', 'job': 'Artist'},
    {'name': 'David', 'job': 'Teacher'},
    {'name': 'Eve', 'job': 'Scientist'},
    {'name': 'Frank', 'job': 'Engineer'},
    {'name': 'Grace', 'job': 'Artist'},
    {'name': 'Hannah', 'job': 'Teacher'}
]
df = pd.DataFrame(df_list, columns=['name', 'job'])

df_job = df['job'].value_counts().reset_index()
df_job.columns = ['job', 'count']

total_count = df_job['count'].sum()
df_job['percentage'] = (df_job['count'] / total_count) * 100

df_job = df_job[['job', 'percentage']]

print(df_job)

#5.
import pandas as pd

names_A = ["학생1", "학생2", "학생3"]
scores_A = [85, 90, 88]
df_A = pd.DataFrame({'이름': names_A, '점수': scores_A})

names_B = ["학생4", "학생5", "학생6"]
scores_B = [78, 82, 80]
df_B = pd.DataFrame({'이름': names_B, '점수': scores_B})

df = pd.concat([df_A, df_B], ignore_index=True)

overall_mean = df['점수'].mean()

def diff(score):
    return score - overall_mean

df['편차'] = df['점수'].apply(diff)
df['전체 평균'] = overall_mean

print(df)