#python #pandas #filter #pandas-groupby #nan
Вопрос:
Набор данных:https://dl.dropboxusercontent.com/s/v9gmgxupkypn5dw/train-data.csv
Используя предоставленный словарь, отрегулируйте New_Price для инфляции, выполнив следующие действия:
a Make a function called inflation that inputs a dataframe.
b Make a new variable conversion. This is a column with the values of conversion_table that matches the column Year as the key.
c Remove any non-numerical characters in the column New_Price. Replace the New_Price with that change.
d Convert the column type New_Price into float. Replace the New_Price with that change.
e Multiply New_Price with conversion. Replace the New_Price with that change.
f Return the dataframe.
Then, Print Year, New_Price, and New_Price_Adjusted.
Мои коды приведены ниже:
# 1
bool_mean_less_than_average = df.groupby("Location")["Price"].mean() < df["Price"].mean()
filtered_less = df.groupby("Location")["Price"].mean()[bool_mean_less_than_average]
print(df.groupby("Location").filter(lambda group: group["Price"].mean() > df["Price"].mean())["Location"].unique())
# 2
bool_mean_higher_than_average = df.groupby("Location")["Price"].mean() > df["Price"].mean()
filtered_higher = df.groupby("Location")["Price"].mean()[bool_mean_higher_than_average]
print(df.groupby("Location").filter(lambda group: group["Price"].mean() < df["Price"].mean())["Location"].unique())
#3
conversion_table = {
1998: 3.7327,
1999: 3.2372,
2000: 3.2216,
2001: 3.1133,
2002: 2.9603,
2003: 2.8694,
2004: 2.7662,
2005: 2.6652,
2006: 2.5246,
2007: 2.3702,
2008: 2.2461,
2009: 2.0475,
2010: 1.7809,
2011: 1.6270,
2012: 1.5278,
2013: 1.3743,
2014: 1.2594,
2015: 1.1897,
2016: 1.1189,
2017: 1.0945,
2018: 1.0524,
2019: 1.0}
def inflation(df):
conversion = conversion_table[df["Year"].values[0]]
#df[df.New_Price.apply(lambda x: x.isnumeric())].set_index('New_Price')
#df[pd.to_numeric(df['New_Price'], errors='coerce').isna()]
df['New_Price'] = pd.to_numeric(df['Price'], errors='coerce')
#df = df.dropna(subset=['New_Price'])
df["New_Price"] = df["New_Price"].apply(pd.to_numeric, downcast='float', errors='coerce')
dep_plus_arr = df["New_Price"]
df["New_Price"] = (dep_plus_arr * conversion).round(1)
return df
#4
df.groupby("Year",as_index=False).apply(inflation)
print(df[["Year","New_Price"]])
Не должно появляться каких-то выходных данных, таких как NaN. Как исправить эту проблему???