Responsive Ad Area

Share This Post

test

Spark Failure : Caused by: org.apache.spark.shuffle.FetchFailedException: Too large frame: 5454002341

I am generating Hierarchy for table determining the parent child:

Below are configuration that are used , even after getting the ERROR w.r.t too large frame:

Spark Properties:

--conf spark.yarn.executor.memoryOverhead=1024mb --conf 
yarn.nodemanager.resource.memory-mb=12288mb  --driver-memory 32g --driver- 
cores  8 --executor-cores 32 --num-executors 8 --executor-memory  256g --conf 
spark.maxRemoteBlockSizeFetchToMem=15g

import org.apache.log4j.{Level, Logger};
import org.apache.spark.SparkContext;
import org.apache.spark.sql.{DataFrame, SparkSession};
import org.apache.spark.sql.functions._;
import org.apache.spark.sql.expressions._;


lazy val sparkSession = SparkSession.builder.enableHiveSupport().getOrCreate();

import spark.implicits._;

val hiveEmp: DataFrame = sparkSession.sql("select * from dl_pp_wrk_tables_dev.wtransaction_incremental_interim");
hiveEmp.repartition(300);
import org.apache.spark.sql.functions._;

val nestedLevel = 3;

val empHierarchy = (1 to nestedLevel).foldLeft(hiveEmp.as("wd0")) { (wDf, i) =>
val j = i - 1
wDf.join(hiveEmp.as(s"wd$i"), col(s"wd$j.parent_id".trim) === col(s"wd$i.id".trim), "left_outer")
}.select(
col("wd0.id") :: col("wd0.parent_id") ::
col("wd0.amount").as("amount") :: col("wd0.payment_id").as("payment_id") :: (
(1 to nestedLevel).toList.map(i => col(s"wd$i.amount").as(s"amount_$i")) :::
(1 to nestedLevel).toList.map(i => col(s"wd$i.payment_id").as(s"payment_id_$i"))

): _*);


empHierarchy.write.saveAsTable("wtransaction_hierarchy_test4");

ERROR:

Caused by: org.apache.spark.SparkException: Task failed while writing rows

at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)

at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)

at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)

at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)

at org.apache.spark.scheduler.Task.run(Task.scala:99)

at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)

… 3 more

Caused by: org.apache.spark.shuffle.FetchFailedException: Too large frame: 5454002341

at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:361)

at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:336)


Spark Failure : Caused by: org.apache.spark.shuffle.FetchFailedException: Too large frame: 5454002341
Spark Failure : Caused by: org.apache.spark.shuffle.FetchFailedException: Too large frame: 5454002341
test
{$excerpt:n}

Share This Post

Leave a Reply

Your email address will not be Publishedd. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>