이론

#1 
  A	B
0	1	0
1	2	2
2	0	3
3	4	4

#2
x
return column.split(".")[0] 으로 해야함

#3
0

#4
x
df.job.unique 에 괄호가 없다.

#5
x
append 함수는 한번에 하나의 데이터 프레임만 추가 가능

실습

#1
import pandas as pd
import numpy as np
data = {
    'A': [1, 2, np.nan, 4],
    'B': [np.nan, 2, 3, 4],
}
df = pd.DataFrame(data)
df.fillna("unknown", inplace = True)
df

#2
import pandas as pd
import numpy as np
data_list=[ {'yyyy.mm.dd' : '2000.06.28'},
               {'yyyy.mm.dd' : '2003.03.28'},
               {'yyyy.mm.dd' : '2006.08.25'} ]
df = pd.DataFrame(data_list)
def extract_year(column):
     return column.split(".")[1]
df['month'] = df['yyyy.mm.dd'].apply(extract_year)
df

#3
import pandas as pd
import numpy as np
data = [
    {'학번': '20-123456', '하츄핑': 'A', '과목1': 59.745, '과목2': 33.678},
    {'학번': '22-135791', '차차핑': 'B', '과목1': 26.234, '과목2': 89.123},
    {'학번': '24-246802', '바로핑': 'A', '과목1': 79.521, '과목2': 11.937}
]
df = pd.DataFrame(data)
def extract_year(column):
    return column.split('-')[0]
df['학번']=df['학번'].apply(extract_year)
df['이름'] = df.apply(lambda row: 1 if 'A' in row.values else 2, axis=1)
df['과목1'] = df['과목1'].round(1)
df['과목2'] = df['과목2'].round(1)
df['평균 점수'] = df[['과목1', '과목2']].mean(axis=1).round(1)
df = df[['학번', '이름', '과목1', '과목2', '평균 점수']]
df

#4
import pandas as pd
df_list = [
    {'name': 'Alice', 'job': 'Teacher'},
    {'name': 'Bob', 'job': 'Doctor'},
    {'name': 'Charlie', 'job': 'Artist'},
    {'name': 'David', 'job': 'Teacher'},
    {'name': 'Eve', 'job': 'Scientist'},
    {'name': 'Frank', 'job': 'Engineer'},
    {'name': 'Grace', 'job': 'Artist'},
    {'name': 'Hannah', 'job': 'Teacher'}
]
df = pd.DataFrame(df_list, columns=['name', 'job'])
df_jobb = df.job.unique()
job_counts = df['job'].value_counts().reset_index(False)
job_counts['percentage'] = ((job_counts['count'] / len(df)) * 100).round(1)
df_job = job_counts[['job','percentage']]
print(df_job)

#5
names_A = ["학생1", "학생2", "학생3"]
scores_A = [85, 90, 88]
names_B = ["학생4", "학생5", "학생6"]
scores_B = [78, 82, 80]
df1 = pd.DataFrame({'names' : names_A, 'scores':scores_A })
df2 = pd.DataFrame({'names' : names_B, 'scores':scores_B })
df = pd.concat([df1,df2], ignore_index = True)
mean_s = df['scores'].mean()
def diff(x):
    return x - mean_s
df['score_diff'] = df['scores'].apply(diff)
df