#azure-sql-database #databricks #azure-databricks #delta-lake #databricks-sql
Вопрос:
Я попытался создать новую таблицу из дельта-таблицы и добавить новый столбец NULL при использовании ссылок данных SQL. Databricks не может создать нулевой столбец, если я заполню только что созданный столбец, он будет работать нормально. Как добавить столбец NULL в новую таблицу на основе существующей дельта-таблицы?
Не работает, когда значение равно НУЛЮ
Когда я заполняю колонку, это работает.
Он возвращает следующую ошибку:
com.databricks.backend.common.rpc.DatabricksExceptions$SQLExecutionException: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding attribute, tree: Parent_name#23885 at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.catalyst.expressions.BindReferences$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:75) at org.apache.spark.sql.catalyst.expressions.BindReferences$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:74) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:484) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:86) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:484) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:460) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:428) at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:74) at org.apache.spark.sql.catalyst.expressions.BindReferences$.$anonfun$bindReferences$1(BoundAttribute.scala:96) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.map(TraversableLike.scala:238) at scala.collection.TraversableLike.map$(TraversableLike.scala:231) at scala.collection.immutable.List.map(List.scala:298) at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReferences(BoundAttribute.scala:96) at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:68) at org.apache.spark.sql.execution.CodegenSupport.consume(WholeStageCodegenExec.scala:195) at org.apache.spark.sql.execution.CodegenSupport.consume$(WholeStageCodegenExec.scala:150) at org.apache.spark.sql.execution.ColumnarToRowExec.consume(Columnar.scala:66) at org.apache.spark.sql.execution.ColumnarToRowExec.doProduce(Columnar.scala:191) at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:96) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:257) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:253) at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:91) at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:91) at org.apache.spark.sql.execution.ColumnarToRowExec.produce(Columnar.scala:66) at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:53) at org.apache.spark.sql.execution.CodegenSupport.$anonfun$produce$1(WholeStageCodegenExec.scala:96) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:257) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:253) at org.apache.spark.sql.execution.CodegenSupport.produce(WholeStageCodegenExec.scala:91) at org.apache.spark.sql.execution.CodegenSupport.produce$(WholeStageCodegenExec.scala:91) at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:43) at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:657) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:213) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:257) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:165) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:253) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:209) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:79) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:75) at org.apache.spark.sql.execution.collect.InternalRowFormat$.collect(cachedSparkResults.scala:62) at org.apache.spark.sql.execution.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:512) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:511) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:399) at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:59) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3031) at org.apache.spark.sql.Dataset.$anonfun$collectResult$1(Dataset.scala:3022) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3815) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:126) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:269) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:104) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:852) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:219) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3813) at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3021) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:263) at com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:97) at com.databricks.backend.daemon.driver.SQLDriverLocal.executeSql(SQLDriverLocal.scala:115) at com.databricks.backend.daemon.driver.SQLDriverLocal.repl(SQLDriverLocal.scala:144) at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$13(DriverLocal.scala:542) at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235) at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:51) at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:279) at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:271) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:51) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:519) at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:689) at scala.util.Try$.apply(Try.scala:213) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:681) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:522) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:634) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:427) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:370) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:221) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.RuntimeException: Couldn't find Parent_name#23885 in [Source_id#23886,Rol_code#23887] at scala.sys.package$.error(package.scala:30) at org.apache.spark.sql.catalyst.expressions.BindReferences$anonfun$bindReference$1.$anonfun$applyOrElse$1(BoundAttribute.scala:81) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 85 more at com.databricks.backend.daemon.driver.SQLDriverLocal.executeSql(SQLDriverLocal.scala:129) at com.databricks.backend.daemon.driver.SQLDriverLocal.repl(SQLDriverLocal.scala:144) at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$13(DriverLocal.scala:542) at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:240) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62) at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:235) at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:232) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:51) at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:279) at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:271) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:51) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:519) at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:689) at scala.util.Try$.apply(Try.scala:213) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:681) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:522) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:634) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:427) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:370) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:221) at java.lang.Thread.run(Thread.java:748)
Ответ №1:
У вас не может быть столбца со null
значением без указания типа для этого столбца, поэтому вам нужно выбрать для него определенный тип и привести null
к этому типу.
Измените код с select null as parent_name, ....
на select cast(null as string) as parent_name, ....
(при необходимости измените тип)