#python #pandas #dataframe
#python #панды #фрейм данных
Вопрос:
Я пытаюсь заполнить нулевое значение job_industry_category
из фрейма данных поиска. Например:
df = pd.DataFrame()
df['job_title'] = ['Executive Secretary', 'Administrative Officer' , 'Recruiting Manager' , 'Senior Editor', 'Media Manager I']
df['job_industry_category'] = ['Health', 'Financial Services' , 'Property', NaN, NaN]
df
job_title job_industry_category
0 Executive Secretary Health
1 Administrative Officer Financial Services
2 Recruiting Manager Property
3 Senior Editor NaN
4 Media Manager I NaN
lookup = pd.DataFrame()
lookup['job_title'] = ['Executive Secretary', 'Senior Editor', 'Media Manager I']
lookup['job_industry_category'] = ['Retail', 'Manufacturing', 'Health']
lookup
job_title job_industry_category
0 Executive Secretary Health
1 Senior Editor Manufacturing
2 Media Manager I Health
И результат, который я ожидаю, будет:
df
job_title job_industry_category
0 Executive Secretary Health
1 Administrative Officer Financial Services
2 Recruiting Manager Property
3 Senior Editor Manufacturing
4 Media Manager I Health
Я пытался использовать map
, как это:
df.loc[df['job_industry_category'].isnull(), 'job_industry_category'] = lookup['job_title'].map(lookup)
А также удаление na из другого сообщения:
def remove_na(x):
if pd.isnull(x['job_industry_category']):
return freq_job_ind[x['job_title']]
else:
return x['job_industry_category']
df['job_industry_category'] = df.apply(remove_na, axis=1)
Но оба не сработали, и я не уверен, есть ли лучший способ сделать это?
Заранее благодарю вас!
Ответ №1:
Получите недостающие местоположения с помощью isna(), а затем используйте map с set_index .
% ipython
Python 3.8.5 (default, Sep 4 2020, 07:30:14)
Type 'copyright', 'credits' or 'license' for more information
IPython 7.18.1 -- An enhanced Interactive Python. Type '?' for help.
In [1]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
'job_title': ['Executive Secretary', 'Administrative Officer',
'Recruiting Manager', 'Senior Editor', 'Media Manager I'],
'job_industry_category': ['Health', 'Financial Services',
'Property', np.nan, np.nan]})
df
Out[1]:
job_title job_industry_category
0 Executive Secretary Health
1 Administrative Officer Financial Services
2 Recruiting Manager Property
3 Senior Editor NaN
4 Media Manager I NaN
In [2]:
lookup = pd.DataFrame({
'job_title': ['Executive Secretary', 'Senior Editor', 'Media Manager I'],
'job_industry_category': ['Retail', 'Manufacturing', 'Health']})
lookup
Out[2]:
job_title job_industry_category
0 Executive Secretary Retail
1 Senior Editor Manufacturing
2 Media Manager I Health
In [3]:
missing = df['job_industry_category'].isna()
In [4]:
df.loc[missing, 'job_industry_category'] = df.loc[missing, 'job_title'].map(
lookup.set_index('job_title')['job_industry_category'])
df
Out[4]:
job_title job_industry_category
0 Executive Secretary Health
1 Administrative Officer Financial Services
2 Recruiting Manager Property
3 Senior Editor Manufacturing
4 Media Manager I Health
Ответ №2:
#Boolean select NaN
m=df.job_industry_category.isna()
#Mask the NaNs and map across values using a dict of lookup['job_title']:lookup['job_industry_category'] df.loc[m,'job_industry_category']=df.loc[m,'job_title'].map(dict(zip(lookup.job_title,lookup.job_industry_category)))
job_title job_industry_category
0 Executive Secretary Health
1 Administrative Officer Financial Services
2 Recruiting Manager Property
3 Senior Editor Manufacturing
4 Media Manager I Health