spark-3.0 application 調度算法解析

時間 2019-12-09

標籤 spark 3.0 application 調度算法解析欄目 Spark 简体版

原文原文鏈接

spark 各個版本的application 調度算法仍是有這明顯的不一樣之處的。從spark1.3.0 到 spark 1.6.一、spark2.0 到如今最新的spark 3.0 ，調度算法有了必定的修改。下面你們一塊兒學習一下，最新的spark 版本spark-3.0的Application 調度機制。算法

private def startExecutorsOnWorkers(): Unit = {
  // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
  // in the queue, then the second app, etc.
  for (app <- waitingApps) {
    //若是在 spark-submmit 腳本中，指定了每一個executor 多少個 CPU core，
    // 則每一個Executor 分配該個數的 core，
    // 不然 默認每一個executor 只分配 1 個 CPU core
    val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
    // If the cores left is less than the coresPerExecutor,the cores left will not be allocated
    //  當前 APP 還須要分配的  core  數 不能  小於 單個 executor 啓動 的 CPU core 數
    if (app.coresLeft >= coresPerExecutor) {
      // Filter out workers that don't have enough resources to launch an executo/*ku*/r
      // 過濾出 狀態 爲 ALIVE，而且還能 發佈 Executor 的 worker
      // 按照剩餘的 CPU core 數  倒序
      val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
        .filter(canLaunchExecutor(_, app.desc))
        .sortBy(_.coresFree).reverse
      if (waitingApps.length == 1 && usableWorkers.isEmpty) {
        logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
      }

    // TODO:  默認採用 spreadOutApps  調度算法， 將 application須要的 executor資源 分派到  多個 worker 上去

      val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)

      // Now that we've decided how many cores to allocate on each worker, let's allocate them
      for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
        allocateWorkerResourceToExecutors(
          app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
      }
    }
  }
}
判斷一個 worker 是否能夠發佈 executor

private def canLaunchExecutor(worker: WorkerInfo, desc: ApplicationDescription): Boolean = {
  canLaunch(
    worker,
    desc.memoryPerExecutorMB,
    desc.coresPerExecutor.getOrElse(1),
    desc.resourceReqsPerExecutor)
}
讓咱們看一看裏面的 canlaunch 方法

private def canLaunch(
    worker: WorkerInfo,
    memoryReq: Int,
    coresReq: Int,
    resourceRequirements: Seq[ResourceRequirement])
  : Boolean = {
  // worker 上 空閒的 內存值  要 大於等於  請求的 內存值
  val enoughMem = worker.memoryFree >= memoryReq
  // worker 上 空閒的 core 數  要 大於等於  請求的 core數
  val enoughCores = worker.coresFree >= coresReq
  //  worker 是否知足 executor 請求的資源   
  val enoughResources = ResourceUtils.resourcesMeetRequirements(
    worker.resourcesAmountFree, resourceRequirements)
  enoughMem && enoughCores && enoughResources
}

回到上面的 scheduleExecutorsOnWorkers

private def scheduleExecutorsOnWorkers(
    app: ApplicationInfo,
    usableWorkers: Array[WorkerInfo],
    spreadOutApps: Boolean): Array[Int] = {
  val coresPerExecutor = app.desc.coresPerExecutor
  val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
  // 默認狀況下 是 開啓  oneExecutorPerWorker 機制的，也就是默認是在 一個 worker 上  只啓動 一個 executor的
  //  若是在spark -submit 腳本中設置了coresPerExecutor ， 在worker資源充足的時候，則 會在每一個worker 上，啓動多個executor
  val oneExecutorPerWorker = coresPerExecutor.isEmpty
  val memoryPerExecutor = app.desc.memoryPerExecutorMB
  val resourceReqsPerExecutor = app.desc.resourceReqsPerExecutor
  val numUsable = usableWorkers.length
  val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
  val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
  var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)


// 判斷  Worker節點是否可以啓動Executor
  def canLaunchExecutorForApp(pos: Int): Boolean = {

    val keepScheduling = coresToAssign >= minCoresPerExecutor
    val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor
    val assignedExecutorNum = assignedExecutors(pos)

    // If we allow multiple executors per worker, then we can always launch new executors.
    // Otherwise, if there is already an executor on this worker, just give it more cores.

    // 若是spark -submit 腳本中設置了coresPerExecutor值，
    // 或者當前 這個worker 尚未爲這個 application 分配 過  executor ,
    val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutorNum == 0
      // TODO:  能夠啓動新的 Executor
    if (launchingNewExecutor) {
      val assignedMemory = assignedExecutorNum * memoryPerExecutor
      val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor
      val assignedResources = resourceReqsPerExecutor.map {
        req => req.resourceName -> req.amount * assignedExecutorNum
      }.toMap
      val resourcesFree = usableWorkers(pos).resourcesAmountFree.map {
        case (rName, free) => rName -> (free - assignedResources.getOrElse(rName, 0))
      }
      val enoughResources = ResourceUtils.resourcesMeetRequirements(
        resourcesFree, resourceReqsPerExecutor)
      val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
      keepScheduling && enoughCores && enoughMemory && enoughResources && underLimit
    } else {
      // We're adding cores to an existing executor, so no need
      // to check memory and executor limits
      // TODO:  不知足啓動新的 Executor條件，則 在 老的 Executor 上 追加  core 數
      keepScheduling && enoughCores
    }
  }

  // Keep launching executors until no more workers can accommodate any
  // more executors, or if we have reached this application's limits

  var freeWorkers = (0 until numUsable).filter(canLaunchExecutorForApp)
  while (freeWorkers.nonEmpty) {
    freeWorkers.foreach { pos =>
      var keepScheduling = true
      while (keepScheduling && canLaunchExecutorForApp(pos)) {
        coresToAssign -= minCoresPerExecutor
        assignedCores(pos) += minCoresPerExecutor

        // If we are launching one executor per worker, then every iteration assigns 1 core
        // to the executor. Otherwise, every iteration assigns cores to a new executor.
        if (oneExecutorPerWorker) {
          //TODO: 若是該Worker節點不能啓動新的 Executor，則每次在老的executor 上 分配 minCoresPerExecutor 個 CPU core(此時該值默認 爲 1 )
          assignedExecutors(pos) = 1
        } else {
          //TODO: 若是該Worker節點能夠啓動新的 Executor，則每次在新的executor 上 分配 minCoresPerExecutor 個 CPU core（此時該值爲 spark-submit腳本配置的 coresPerExecutor 值）
          assignedExecutors(pos) += 1
        }

        // Spreading out an application means spreading out its executors across as
        // many workers as possible. If we are not spreading out, then we should keep
        // scheduling executors on this worker until we use all of its resources.
        // Otherwise, just move on to the next worker.
        if (spreadOutApps) {
          // TODO： 這裏傳入 keepScheduling = false , 就是每次 worker上只分配 一次 core ,而後 到 下一個 worker 上  再去 分配 core，直到 worker
          // TODO:  完成一次遍歷
          keepScheduling = false
        }
      }
    }
    freeWorkers = freeWorkers.filter(canLaunchExecutorForApp)
  }
  // 返回每一個Worker節點分配的CPU核數
  assignedCores
}

再來分析 allocateWorkerResourceToExecutors

private def allocateWorkerResourceToExecutors(    app: ApplicationInfo,    assignedCores: Int,    coresPerExecutor: Option[Int],    worker: WorkerInfo): Unit = {  // If the number of cores per executor is specified, we divide the cores assigned  // to this worker evenly among the executors with no remainder.  // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.  val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)  val coresToAssign = coresPerExecutor.getOrElse(assignedCores)  for (i <- 1 to numExecutors) {    val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)    // TODO : 當前 這個 application 追加 一次  Executor    val exec = app.addExecutor(worker, coresToAssign, allocated)    //TODO： 給worker 線程 發送 launchExecutor 命令    launchExecutor(worker, exec)    app.state = ApplicationState.RUNNING  }}ok，至此，spark最新版本 spark-3.0的Application 調度算法分析完畢！！！