#apache-spark #amazon-emr #thrift #dbt
Вопрос:
Я настроил DBT Spark для работы с EMR.
Моя модель-это:
# mtcars_agg.py {{ config( materialized='table', file_format='parquet', location_root='s3://my-bucket/my-subpath/my-folder' ) }} select cyl, avg(disp) as avg_disp, avg(hp) as avg_hp, avg(wt) as avg_wt from {{source('testes_dbt', 'mtcars')}} group by cyl order by cyl asc;
Я бегу dbt run
, но это никогда не заканчивается. Журнал выполнения находится здесь:
2021-11-25 02:55:17.818135 (Thread-1): /* {"app": "dbt", "dbt_version": "0.21.0", "profile_name": "default", "target_name": "prod", "node_id": "model.my_new_project.mtcars_agg"} */ create table testes_dbt.mtcars_agg using parquet location 's3://my-bucket/my-subpath/my-folder/mtcars_agg' as select cyl, avg(disp) as avg_disp, avg(hp) as avg_hp, avg(wt) as avg_wt from testes_dbt.mtcars group by cyl order by cyl asc 2021-11-25 02:55:17.818735 (Thread-1): TExecuteStatementReq(sessionHandle=TSessionHandle(sessionId=THandleIdentifier(guid=b'x16x12x908x85xbaCx15x8bxa6Bx1ex884Wd', secret=b'x1fxe6x02xb5x15xc3Coxbcxeax0bzx8exe7xdbxa1')), statement='/* {"app": "dbt", "dbt_version": "0.21.0", "profile_name": "default", "target_name": "prod", "node_id": "model.my_new_project.mtcars_agg"} */nn create table testes_dbt.mtcars_aggn n n using parquetn n n n n location 's3://my-bucket/my-subpath/my-folder/mtcars_agg'n n asn nselect n cyl, n avg(disp) as avg_disp, n avg(hp) as avg_hp, n avg(wt) as avg_wtnfrom testes_dbt.mtcarsngroup by cylnorder by cyl asc', confOverlay=None, runAsync=True, queryTimeout=0) 2021-11-25 02:55:17.966240 (Thread-1): TExecuteStatementResp(status=TStatus(statusCode=0, infoMessages=None, sqlState=None, errorCode=None, errorMessage=None), operationHandle=TOperationHandle(operationId=THandleIdentifier(guid=b'x00V$lxafx94Lxb8xadxe1x8fT3qix11', secret=b'xccx10x1cxb0xccx10B[xb7xce6xe2hxd0xb9e'), operationType=0, hasResultSet=True, modifiedRowCount=None)) 2021-11-25 02:55:23.116794 (Thread-1): TGetOperationStatusResp(status=TStatus(statusCode=0, infoMessages=None, sqlState=None, errorCode=None, errorMessage=None), operationState=1, sqlState=None, errorCode=None, errorMessage=None, taskStatus=None, operationStarted=None, operationCompleted=None, hasResultSet=None, progressUpdateResponse=None) 2021-11-25 02:55:23.117370 (Thread-1): Poll status: 1, sleeping ... ... 2021-11-25 02:55:23.116794 (Thread-1): TGetOperationStatusResp(status=TStatus(statusCode=0, infoMessages=None, sqlState=None, errorCode=None, errorMessage=None), operationState=1, sqlState=None, errorCode=None, errorMessage=None, taskStatus=None, operationStarted=None, operationCompleted=None, hasResultSet=None, progressUpdateResponse=None) 2021-11-25 02:55:23.117370 (Thread-1): Poll status: 1, sleeping
Я проверяю свою связь, dbt debug
и все в порядке.
Кто-нибудь знает, что означают эти журналы?