#sql #pyspark #apache-spark-sql
Вопрос:
Я преобразую столбец сокращений штатов в названия штатов, но я также хотел бы получить количество названий каждого штата. Когда я пытаюсь добавить SELECT state,count(*) as count
в начало своего запроса, я в конечном итоге получаю ошибки. Где я могу добавить функцию подсчета, чтобы получить выходные данные с именами состояний и количеством каждого из них?
Код, который я использую
query = """
SELECT
CASE
WHEN state = 'AL' THEN 'Alabama'
WHEN state = 'AK' THEN 'Alaska'
WHEN state = 'AZ' THEN 'Arizona'
WHEN state = 'AR' THEN 'Arkansas'
WHEN state = 'CA' THEN 'California'
WHEN state = 'CO' THEN 'Colorado'
WHEN state = 'CT' THEN 'Connecticut'
WHEN state = 'DE' THEN 'Delaware'
WHEN state = 'DC' THEN 'District of Columbia'
WHEN state = 'FL' THEN 'Florida'
WHEN state = 'GA' THEN 'Georgia'
WHEN state = 'HI' THEN 'Hawaii'
WHEN state = 'ID' THEN 'Idaho'
WHEN state = 'IL' THEN 'Illinois'
WHEN state = 'IN' THEN 'Indiana'
WHEN state = 'IA' THEN 'Iowa'
WHEN state = 'KS' THEN 'Kansas'
WHEN state = 'KY' THEN 'Kentucky'
WHEN state = 'LA' THEN 'Louisiana'
WHEN state = 'ME' THEN 'Maine'
WHEN state = 'MD' THEN 'Maryland'
WHEN state = 'MA' THEN 'Massachusetts'
WHEN state = 'MI' THEN 'Michigan'
WHEN state = 'MN' THEN 'Minnesota'
WHEN state = 'MS' THEN 'Mississippi'
WHEN state = 'MO' THEN 'Missouri'
WHEN state = 'MT' THEN 'Montana'
WHEN state = 'NE' THEN 'Nebraska'
WHEN state = 'NV' THEN 'Nevada'
WHEN state = 'NH' THEN 'New Hampshire'
WHEN state = 'NJ' THEN 'New Jersey'
WHEN state = 'NM' THEN 'New Mexico'
WHEN state = 'NY' THEN 'New York'
WHEN state = 'NC' THEN 'North Carolina'
WHEN state = 'ND' THEN 'North Dakota'
WHEN state = 'OH' THEN 'Ohio'
WHEN state = 'OK' THEN 'Oklahoma'
WHEN state = 'OR' THEN 'Oregon'
WHEN state = 'PA' THEN 'Pennsylvania'
WHEN state = 'RI' THEN 'Rhode Island'
WHEN state = 'SC' THEN 'South Carolina'
WHEN state = 'SD' THEN 'South Dakota'
WHEN state = 'TN' THEN 'Tennessee'
WHEN state = 'TX' THEN 'Texas'
WHEN state = 'UT' THEN 'Utah'
WHEN state = 'VT' THEN 'Vermont'
WHEN state = 'VA' THEN 'Virginia'
WHEN state = 'WA' THEN 'Washington'
WHEN state = 'WV' THEN 'West Virginia'
WHEN state = 'WI' THEN 'Wisconsin'
WHEN state = 'WY' THEN 'Wyoming'
WHEN state = 'AB' THEN 'Alberta'
WHEN state = 'BC' THEN 'British Columbia'
WHEN state = 'MB' THEN 'Manitoba'
WHEN state = 'NM' THEN 'New Brunswick'
WHEN state = 'NL' THEN 'Newfoundland and Labrador'
WHEN state = 'NT' THEN 'Northwest Territories'
WHEN state = 'NS' THEN 'Nova Scotia'
WHEN state = 'NU' THEN 'Nunavut'
WHEN state = 'ON' THEN 'Ontario'
WHEN state = 'PE' THEN 'Prince Edward Island'
WHEN state = 'QC' THEN 'Quebec'
WHEN state = 'SK' THEN 'Saskatchewan'
WHEN state = 'YT' THEN 'Yukon Territory'
END AS state
FROM business
"""
result = spark.sql(query)
result.show()
Вот что я в итоге получаю:
Но я бы хотел, чтобы это выглядело так, только с полными названиями штатов вместо сокращений:
Ответ №1:
Вот, пожалуйста:
SELECT CASE WHEN state = 'AL' THEN 'Alabama' WHEN state = 'AK' THEN 'Alaska' WHEN state = 'AZ' THEN 'Arizona' WHEN state = 'AR' THEN 'Arkansas' WHEN state = 'CA' THEN 'California' WHEN state = 'CO' THEN 'Colorado' WHEN state = 'CT' THEN 'Connecticut' WHEN state = 'DE' THEN 'Delaware' WHEN state = 'DC' THEN 'District of Columbia' WHEN state = 'FL' THEN 'Florida' WHEN state = 'GA' THEN 'Georgia' WHEN state = 'HI' THEN 'Hawaii' WHEN state = 'ID' THEN 'Idaho' WHEN state = 'IL' THEN 'Illinois' WHEN state = 'IN' THEN 'Indiana' WHEN state = 'IA' THEN 'Iowa' WHEN state = 'KS' THEN 'Kansas' WHEN state = 'KY' THEN 'Kentucky' WHEN state = 'LA' THEN 'Louisiana' WHEN state = 'ME' THEN 'Maine' WHEN state = 'MD' THEN 'Maryland' WHEN state = 'MA' THEN 'Massachusetts' WHEN state = 'MI' THEN 'Michigan' WHEN state = 'MN' THEN 'Minnesota' WHEN state = 'MS' THEN 'Mississippi' WHEN state = 'MO' THEN 'Missouri' WHEN state = 'MT' THEN 'Montana' WHEN state = 'NE' THEN 'Nebraska' WHEN state = 'NV' THEN 'Nevada' WHEN state = 'NH' THEN 'New Hampshire' WHEN state = 'NJ' THEN 'New Jersey' WHEN state = 'NM' THEN 'New Mexico' WHEN state = 'NY' THEN 'New York' WHEN state = 'NC' THEN 'North Carolina' WHEN state = 'ND' THEN 'North Dakota' WHEN state = 'OH' THEN 'Ohio' WHEN state = 'OK' THEN 'Oklahoma' WHEN state = 'OR' THEN 'Oregon' WHEN state = 'PA' THEN 'Pennsylvania' WHEN state = 'RI' THEN 'Rhode Island' WHEN state = 'SC' THEN 'South Carolina' WHEN state = 'SD' THEN 'South Dakota' WHEN state = 'TN' THEN 'Tennessee' WHEN state = 'TX' THEN 'Texas' WHEN state = 'UT' THEN 'Utah' WHEN state = 'VT' THEN 'Vermont' WHEN state = 'VA' THEN 'Virginia' WHEN state = 'WA' THEN 'Washington' WHEN state = 'WV' THEN 'West Virginia' WHEN state = 'WI' THEN 'Wisconsin' WHEN state = 'WY' THEN 'Wyoming' WHEN state = 'AB' THEN 'Alberta' WHEN state = 'BC' THEN 'British Columbia' WHEN state = 'MB' THEN 'Manitoba' WHEN state = 'NM' THEN 'New Brunswick' WHEN state = 'NL' THEN 'Newfoundland and Labrador' WHEN state = 'NT' THEN 'Northwest Territories' WHEN state = 'NS' THEN 'Nova Scotia' WHEN state = 'NU' THEN 'Nunavut' WHEN state = 'ON' THEN 'Ontario' WHEN state = 'PE' THEN 'Prince Edward Island' WHEN state = 'QC' THEN 'Quebec' WHEN state = 'SK' THEN 'Saskatchewan' WHEN state = 'YT' THEN 'Yukon Territory' END AS state,
COUNT(*)
FROM business
GROUP BY state