Matplotlib, как исправить разделение по признаку пола в файле импорта

#python-3.x #matplotlib

Вопрос:

Я пытаюсь создать сгруппированную гистограмму, для того, чтобы напечатать общее каждой из трех олимпийских медалей (золотую, серебряную и бронзовую), разделенных по полу, проблема в том, что на оси X, он печатает пол каждого отдельного спортсмена (в несколько раз, см. здесь) , вместо, принт «M» и «F» в очередной раз для всех лиц

Код(неполный, я работаю над ограничением медалей по странам и олимпийскому сезону зима/лето):

 import matplotlib.pyplot as plt
import csv


def GenerateBarChart(a, b):
    plt.bar(x1, Gold, s, color='plum', label='Gold')
    plt.bar(x2, Silver, s, color='peru', label='Silver')
    plt.bar(x3, Bronze, s, color='darkblue', label='Bronze')
    ax.set_ylabel('Values')
    ax.set_title('Grouped')
    ax.legend()
    ax.set_xticks(a)
    ax.set_xticklabels(b)
    plt.xticks(fontsize=5.5)
    plt.show()
    plt.close


def PrintBarChart():
    GenerateBarChart(x1, Sex)


Sex = []
NOC = []
Season = []
Medal = []

with open('compactado.csv', 'r', newline="") as arq:
    reader = csv.reader(arq)
    next(reader)
    for column in csv.reader(arq):
        if len(column) == 0:
            break
        else:
            Sex.append(column[2])
            column[10] = column[10].replace(',', '')
            Season.append(column[10])
            column[7] = column[7].replace(',', '')
            NOC.append(column[7])
            column[14] = column[14].replace(',', '')
            Medal.append(column[14])

s = 0.3

x1 = range(len(Sex))
x2 = [x - s for x in x1]
x3 = [x   s for x in x1]

Season = list(map(str, Season))
NOC = list(map(str, NOC))
Medal = list(map(str, Medal))

Gold = 0
if Medal=="Gold":
   Gold  = 1

Silver = 0
if Medal=="Silver":
   Silver  = 1

Bronze = 0
if Medal=="Bronze":
   Bronze  = 1

fig, ax = plt.subplots()
 

Часть файла:

 ID,"Name","Sex","Age","Height","Weight","Team","NOC","Games","Year","Season","City","Sport","Event","Medal"
1,"Arvo Ossian Aaltonen","M",22,NA,NA,"Finland","FIN","1912 Summer",1912,"Summer","Stockholm","Swimming","Swimming Men's 200 metres Breaststroke",NA
1,"Arvo Ossian Aaltonen","M",22,NA,NA,"Finland","FIN","1912 Summer",1912,"Summer","Stockholm","Swimming","Swimming Men's 400 metres Breaststroke",NA
1,"Arvo Ossian Aaltonen","M",30,NA,NA,"Finland","FIN","1920 Summer",1920,"Summer","Antwerpen","Swimming","Swimming Men's 200 metres Breaststroke","Bronze"
1,"Arvo Ossian Aaltonen","M",30,NA,NA,"Finland","FIN","1920 Summer",1920,"Summer","Antwerpen","Swimming","Swimming Men's 400 metres Breaststroke","Bronze"
2,"Juhamatti Tapio Aaltonen","M",28,184,85,"Finland","FIN","2014 Winter",2014,"Winter","Sochi","Ice Hockey","Ice Hockey Men's Ice Hockey","Bronze"
3,"Paavo Johannes Aaltonen","M",28,175,64,"Finland","FIN","1948 Summer",1948,"Summer","London","Gymnastics","Gymnastics Men's Team All-Around","Gold"
4,"Kjetil Andr Aamodt","M",22,176,85,"Norway","NOR","1994 Winter",1994,"Winter","Lillehammer","Alpine Skiing","Alpine Skiing Men's Downhill","Silver"
5,"Leila Abdelmoez","F",19,160,46,"Egypt","EGY","2016 Summer",2016,"Summer","Rio de Janeiro","Synchronized Swimming","Synchronized Swimming Women's Team",NA
6,"Reema Abdo","F",21,173,59,"Canada","CAN","1984 Summer",1984,"Summer","Los Angeles","Swimming","Swimming Women's 4 x 100 metres Medley Relay","Bronze"
7,"Mona Ahmad Abdulaziz Hassanein","F",26,166,61,"Egypt","EGY","2012 Summer",2012,"Summer","London","Fencing","Fencing Women's epee, Individual",NA
8,"Zagalav Abdulbekovich Abdulbekov","M",26,160,62,"Soviet Union","URS","1972 Summer",1972,"Summer","Munich","Wrestling","Wrestling Men's Featherweight, Freestyle","Gold"
9,"Irene Abel","F",19,160,48,"East Germany","GDR","1972 Summer",1972,"Summer","Munich","Gymnastics","Gymnastics Women's Team All-Around","Silver"
10,"Nicola Virginia Adams","F",33,164,51,"Great Britain","GBR","2016 Summer",2016,"Summer","Rio de Janeiro","Boxing","Boxing Women's Flyweight","Gold"
 

Ответ №1:

Чтобы избежать изобретения колеса, я предлагаю вам импортировать набор данных с пакетом pandas в объект фрейма данных с помощью функции read_csv и создать сгруппированную гистограмму с пакетом seaborn, который был построен поверх matplotlib, чтобы обеспечить простой способ создания такого типа графика (среди многих других). Вот как вы можете построить выборку данных с помощью функции countplot:

 import pandas as pd    # v 1.2.5
import seaborn as sns  # v 0.11.1

df = pd.read_csv('compactado.csv')
df.head()
 

df

 sns.countplot(data=df, x='Medal', hue='Sex')
 

график подсчета