虛擬機冷遷移和resize代碼分析（二）

時間 2019-11-12

原文原文鏈接

　　上一節咱們分析了冷遷移和resize上層各自的邏輯，本節主要講解二者底層相同的代碼邏輯。其中compute_api.resize()方法會進一步調用nova/compute/api.py.resize()的方法。部分代碼和註釋以下所示：前端

@wrap_check_policy
@check_instance_lock
@check_instance_cell
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
           **extra_instance_updates):
    """Resize (ie, migrate) a running instance.

    If flavor_id is None, the process is considered a migration, keeping
    the original flavor_id. If flavor_id is not None, the instance should
    be migrated to a new host and resized to the new flavor_id.
    """
    self._check_auto_disk_config(instance, **extra_instance_updates)

    current_instance_type = instance.get_flavor()

    #若是flavor_id爲空，instance_type不變；不然使用前端傳入的flavor_id
    if not flavor_id:
        LOG.debug("flavor_id is None. Assuming migration.",
                  instance=instance)
        new_instance_type = current_instance_type
    else:
        new_instance_type = flavors.get_flavor_by_flavor_id(
                flavor_id, read_deleted="no")
        if (new_instance_type.get('root_gb') == 0 and
            current_instance_type.get('root_gb') != 0 and
            not self.is_volume_backed_instance(context, instance)):
            reason = _('Resize to zero disk flavor is not allowed.')
            raise exception.CannotResizeDisk(reason=reason)

    if not new_instance_type:
        raise exception.FlavorNotFound(flavor_id=flavor_id)

    current_instance_type_name = current_instance_type['name']
    new_instance_type_name = new_instance_type['name']
    LOG.debug("Old instance type %(current_instance_type_name)s, "
              "new instance type %(new_instance_type_name)s",
              {'current_instance_type_name': current_instance_type_name,
               'new_instance_type_name': new_instance_type_name},
              instance=instance)

    same_instance_type = (current_instance_type['id'] ==
                          new_instance_type['id'])

    # NOTE(sirp): We don't want to force a customer to change their flavor
    # when Ops is migrating off of a failed host.
    if not same_instance_type and new_instance_type.get('disabled'):
        raise exception.FlavorNotFound(flavor_id=flavor_id)

    if same_instance_type and flavor_id and self.cell_type != 'compute':
        raise exception.CannotResizeToSameFlavor()

    # ensure there is sufficient headroom for upsizes
    if flavor_id:
		#計算resize所需的資源配額，主要統計vcpu和內存
        deltas = compute_utils.upsize_quota_delta(context,
                                                  new_instance_type,
                                                  current_instance_type)
        try:
			#檢查更新項目配額
            quotas = compute_utils.reserve_quota_delta(context, deltas,
                                                       instance)
        except exception.OverQuota as exc:
            quotas = exc.kwargs['quotas']
            overs = exc.kwargs['overs']
            usages = exc.kwargs['usages']
            headroom = self._get_headroom(quotas, usages, deltas)
            (overs, reqs, total_alloweds,
             useds) = self._get_over_quota_detail(headroom, overs, quotas,
                                                  deltas)
            LOG.warning(_LW("%(overs)s quota exceeded for %(pid)s,"
                            " tried to resize instance."),
                        {'overs': overs, 'pid': context.project_id})
            raise exception.TooManyInstances(overs=overs,
                                             req=reqs,
                                             used=useds,
                                             allowed=total_alloweds)
    else:
        quotas = objects.Quotas(context=context)
	#將instance task_state狀態設置爲RESIZE_PREP
    instance.task_state = task_states.RESIZE_PREP
    instance.progress = 0
	#更新實例狀態到數據庫
    instance.update(extra_instance_updates)
    instance.save(expected_task_state=[None])

    filter_properties = {'ignore_hosts': []}
	#判斷是否能夠resize至本機
	#若是nova.conf中allow_resize_to_same_host==false，則將自身host添加到
	#ignore_hosts列表中，後續調度則不會將實例resize到本機
    if not CONF.allow_resize_to_same_host:
        filter_properties['ignore_hosts'].append(instance.host)

    if self.cell_type == 'api':
        # Commit reservations early and create migration record.
        self._resize_cells_support(context, quotas, instance,
                                   current_instance_type,
                                   new_instance_type)

    if not flavor_id:
		#更新數據庫信息，若是flavor_id爲空，表示冷遷移，不然表示resize
        self._record_action_start(context, instance,
                                  instance_actions.MIGRATE)
    else:
        self._record_action_start(context, instance,
                                  instance_actions.RESIZE)

    scheduler_hint = {'filter_properties': filter_properties}
	#調用conductor api，經過conductor rpc將請求轉發給conductor manager 
    self.compute_task_api.resize_instance(context, instance,
            extra_instance_updates, scheduler_hint=scheduler_hint,
            flavor=new_instance_type,
            reservations=quotas.reservations or [],
            clean_shutdown=clean_shutdown)

　　接着調用nova/conductor/api.py.ComputeTaskAPI.resize_instance()方法，其代碼以下：node

def resize_instance(self, context, instance, extra_instance_updates,
                    scheduler_hint, flavor, reservations,
                    clean_shutdown=True):
    # NOTE(comstud): 'extra_instance_updates' is not used here but is
    # needed for compatibility with the cells_rpcapi version of this
    # method.
    self.conductor_compute_rpcapi.migrate_server(
        context, instance, scheduler_hint, live=False, rebuild=False,
        flavor=flavor, block_migration=None, disk_over_commit=None,
        reservations=reservations, clean_shutdown=clean_shutdown)

　　接着繼續調用nova/conductor/rpcapi.py.ComputeTaskAPI.migrate_server()方法，其代碼以下：數據庫

def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
              flavor, block_migration, disk_over_commit,
              reservations=None, clean_shutdown=True):
	#此處傳入live==false，即爲冷遷移
    kw = {'instance': instance, 'scheduler_hint': scheduler_hint,
          'live': live, 'rebuild': rebuild, 'flavor': flavor,
          'block_migration': block_migration,
          'disk_over_commit': disk_over_commit,
          'reservations': reservations,
          'clean_shutdown': clean_shutdown}
    version = '1.11'
    if not self.client.can_send_version(version):
        del kw['clean_shutdown']
        version = '1.10'
    if not self.client.can_send_version(version):
        kw['flavor'] = objects_base.obj_to_primitive(flavor)
        version = '1.6'
    if not self.client.can_send_version(version):
        kw['instance'] = jsonutils.to_primitive(
                objects_base.obj_to_primitive(instance))
        version = '1.4'
    cctxt = self.client.prepare(version=version)
    return cctxt.call(context, 'migrate_server', **kw)

　　nova-conductor會收到該請求，根據路由映射，該請求會遞交給nova/couductor/manager.py.ComputeTaskManager.migrate_server()去處理。其代碼和註釋以下：json

@messaging.expected_exceptions(exception.NoValidHost,
                               exception.ComputeServiceUnavailable,
                               exception.InvalidHypervisorType,
                               exception.InvalidCPUInfo,
                               exception.UnableToMigrateToSelf,
                               exception.DestinationHypervisorTooOld,
                               exception.InvalidLocalStorage,
                               exception.InvalidSharedStorage,
                               exception.HypervisorUnavailable,
                               exception.InstanceInvalidState,
                               exception.MigrationPreCheckError,
                               exception.LiveMigrationWithOldNovaNotSafe,
                               exception.UnsupportedPolicyException)
def migrate_server(self, context, instance, scheduler_hint, live, rebuild,
        flavor, block_migration, disk_over_commit, reservations=None,
        clean_shutdown=True):
    if instance and not isinstance(instance, nova_object.NovaObject):
        # NOTE(danms): Until v2 of the RPC API, we need to tolerate
        # old-world instance objects here
        attrs = ['metadata', 'system_metadata', 'info_cache',
                 'security_groups']
        instance = objects.Instance._from_db_object(
            context, objects.Instance(), instance,
            expected_attrs=attrs)
    # NOTE: Remove this when we drop support for v1 of the RPC API
    if flavor and not isinstance(flavor, objects.Flavor):
        # Code downstream may expect extra_specs to be populated since it
        # is receiving an object, so lookup the flavor to ensure this.
        flavor = objects.Flavor.get_by_id(context, flavor['id'])
    if live and not rebuild and not flavor:
        self._live_migrate(context, instance, scheduler_hint,
                           block_migration, disk_over_commit)
	#非熱遷移、非重建且flavor，也即resize或者是冷遷移，下面二者開始執行相同的邏輯代碼
    elif not live and not rebuild and flavor:
        instance_uuid = instance.uuid
        with compute_utils.EventReporter(context, 'cold_migrate',
                                         instance_uuid):
            self._cold_migrate(context, instance, flavor,
                               scheduler_hint['filter_properties'],
                               reservations, clean_shutdown)
    else:
        raise NotImplementedError()

　　　　接着調用_cold_migrate()方法,其代碼和註釋以下：api

def _cold_migrate(self, context, instance, flavor, filter_properties,
                  reservations, clean_shutdown):
    image = utils.get_image_from_system_metadata(
        instance.system_metadata)

    request_spec = scheduler_utils.build_request_spec(
        context, image, [instance], instance_type=flavor)
    task = self._build_cold_migrate_task(context, instance, flavor,
                                         filter_properties, request_spec,
                                         reservations, clean_shutdown)
    try:
        task.execute()
    except exception.NoValidHost as ex:
        vm_state = instance.vm_state
        if not vm_state:
            vm_state = vm_states.ACTIVE
        updates = {'vm_state': vm_state, 'task_state': None}
        self._set_vm_state_and_notify(context, instance.uuid,
                                      'migrate_server',
                                      updates, ex, request_spec)

        # if the flavor IDs match, it's migrate; otherwise resize
        if flavor.id == instance.instance_type_id:
            msg = _("No valid host found for cold migrate")
        else:
            msg = _("No valid host found for resize")
        raise exception.NoValidHost(reason=msg)
    except exception.UnsupportedPolicyException as ex:
        with excutils.save_and_reraise_exception():
            vm_state = instance.vm_state
            if not vm_state:
                vm_state = vm_states.ACTIVE
            updates = {'vm_state': vm_state, 'task_state': None}
            self._set_vm_state_and_notify(context, instance.uuid,
                                          'migrate_server',
                                          updates, ex, request_spec)
    except Exception as ex:
        with excutils.save_and_reraise_exception():
            updates = {'vm_state': instance.vm_state,
                       'task_state': None}
            self._set_vm_state_and_notify(context, instance.uuid,
                                          'migrate_server',
                                          updates, ex, request_spec)

　　接着調用_build_cold_migrate_task()方法，其代碼和註釋以下：app

def _build_cold_migrate_task(self, context, instance, flavor,
                             filter_properties, request_spec, reservations,
                             clean_shutdown):
    return migrate.MigrationTask(context, instance, flavor,
                                 filter_properties, request_spec,
                                 reservations, clean_shutdown,
                                 self.compute_rpcapi,
                                 self.scheduler_client)

　　後面會進一步調用nova/couductor/tasks/migrate.py.MigrationTask._excute()方法。其代碼和註釋以下：dom

def _execute(self):
    image = self.request_spec.get('image')
    self.quotas = objects.Quotas.from_reservations(self.context,
                                                   self.reservations,
                                                   instance=self.instance)
    scheduler_utils.setup_instance_group(self.context, self.request_spec,
                                         self.filter_properties)
    scheduler_utils.populate_retry(self.filter_properties,
                                   self.instance.uuid)
	
		'''發送同步消息給nova-scheduler，選取用於遷移雲主機的主機
        接口調用以下：SchedulerClient -> SchedulerQueryClient -
        > SchedulerAPI'''
    hosts = self.scheduler_client.select_destinations(
        self.context, self.request_spec, self.filter_properties)
	#從hosts中選擇第一個host進行冷遷移（第一個即爲權重最大者）
    host_state = hosts[0]
	scheduler_utils.populate_filter_properties(self.filter_properties,
                                               host_state)
    # context is not serializable
    self.filter_properties.pop('context', None)

    (host, node) = (host_state['host'], host_state['nodename'])
    self.compute_rpcapi.prep_resize(
        self.context, image, self.instance, self.flavor, host,
        self.reservations, request_spec=self.request_spec,
        filter_properties=self.filter_properties, node=node,
        clean_shutdown=self.clean_shutdown)

　　接下來繼續調用nova\scheduler\rpcapi.py.select_destinations()方法，其代碼和註釋以下：ide

def select_destinations(self, ctxt, request_spec, filter_properties):
    cctxt = self.client.prepare(version='4.0')
    return cctxt.call(ctxt, 'select_destinations',
        request_spec=request_spec, filter_properties=filter_properties)

　　接下來進一步調用nova/scheduler/manager.py.SchedulerManager.select_destinations()方法。其代碼和註釋以下：ui

def select_destinations(self, context, request_spec, filter_properties):
    """Returns destinations(s) best suited for this request_spec and
    filter_properties.

    The result should be a list of dicts with 'host', 'nodename' and
    'limits' as keys.
    """
    dests = self.driver.select_destinations(context, request_spec,
        filter_properties)
    return jsonutils.to_primitive(dests)

　　此時要注意，scheduler_driver的類型，該參數是在nova.conf配置的，默認採用nova.scheduler.filter_scheduler.FilterScheduler。故應該調用nova/scheduler/filter_scheduler.py.select_destinations()方法，其代碼和註釋以下：this

def select_destinations(self, context, request_spec, filter_properties):
    """Selects a filtered set of hosts and nodes."""
    # TODO(sbauza): Change the select_destinations method to accept a
    # RequestSpec object directly (and add a new RPC API method for passing
    # a RequestSpec object over the wire)
    spec_obj = objects.RequestSpec.from_primitives(context,
                                                   request_spec,
                                                   filter_properties)
    self.notifier.info(
        context, 'scheduler.select_destinations.start',
        dict(request_spec=spec_obj.to_legacy_request_spec_dict()))

    num_instances = spec_obj.num_instances
    selected_hosts = self._schedule(context, spec_obj)

    # Couldn't fulfill the request_spec
    if len(selected_hosts) < num_instances:
        # NOTE(Rui Chen): If multiple creates failed, set the updated time
        # of selected HostState to None so that these HostStates are
        # refreshed according to database in next schedule, and release
        # the resource consumed by instance in the process of selecting
        # host.
        for host in selected_hosts:
            host.obj.updated = None

        # Log the details but don't put those into the reason since
        # we don't want to give away too much information about our
        # actual environment.
        LOG.debug('There are %(hosts)d hosts available but '
                  '%(num_instances)d instances requested to build.',
                  {'hosts': len(selected_hosts),
                   'num_instances': num_instances})

        reason = _('There are not enough hosts available.')
        raise exception.NoValidHost(reason=reason)

    dests = [dict(host=host.obj.host, nodename=host.obj.nodename,
                  limits=host.obj.limits) for host in selected_hosts]

    self.notifier.info(
        context, 'scheduler.select_destinations.end',
        dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
    return dests

其中_schedule()方法以下：

def _schedule(self, context, spec_obj):
    """Returns a list of hosts that meet the required specs,
    ordered by their fitness.
    """
    elevated = context.elevated()
    #加載nova.conf文件中的過濾選項
	#用戶能夠經過nova.conf中的scheduler_json_config_location
	#參數指定一個包含過濾參數的json格式的過濾文件
    config_options = self._get_configuration_options()

    # Find our local list of acceptable hosts by repeatedly
    # filtering and weighing our options. Each time we choose a
    # host, we virtually consume resources on it so subsequent
    # selections can adjust accordingly.

    # Note: remember, we are using an iterator here. So only
    # traverse this list once. This can bite you if the hosts
    # are being scanned in a filter or weighing function.
	
    #獲取全部的活動主機
	hosts = self._get_all_host_states(elevated)

    selected_hosts = []
    num_instances = spec_obj.num_instances
    # TODO(sbauza): Modify the interfaces for HostManager and filters to
    # accept the RequestSpec object directly (in a later patch hopefully)
    filter_properties = spec_obj.to_legacy_filter_properties_dict()
    # NOTE(sbauza): Adding temporarly some keys since filters are
    # directly using it - until we provide directly RequestSpec
    filter_properties.update(
        {'request_spec': spec_obj.to_legacy_request_spec_dict(),
         'instance_type': spec_obj.flavor})
    # TODO(sbauza): Adding two keys not used in-tree but which will be
    # provided as non-fields for the RequestSpec once we provide it to the
    # filters
	#更新過濾參數
    filter_properties.update({'context': context,
                              'config_options': config_options})
    for num in range(num_instances):
        # Filter local hosts based on requirements ...
		# 返回知足過濾條件的host，所使用的過濾器能夠經過nova.conf文件中
        #的scheduler_default_filters選項指定，相關的過濾器代碼位於
        #nova/scheduler/filters
        hosts = self.host_manager.get_filtered_hosts(hosts,
                filter_properties, index=num)
        if not hosts:
            # Can't get any more locally.
            break

        LOG.debug("Filtered %(hosts)s", {'hosts': hosts})
        #經過權重過濾器進一步過濾host，返回一個按照權重降序排列的host列
        #表，權重過濾器能夠經過nova.conf文件中
        #的scheduler_weight_classes選項指定,相關的過濾器代碼位於nova/scheduler/weights
        weighed_hosts = self.host_manager.get_weighed_hosts(hosts,
                filter_properties)

        LOG.debug("Weighed %(hosts)s", {'hosts': weighed_hosts})

        scheduler_host_subset_size = CONF.scheduler_host_subset_size
        if scheduler_host_subset_size > len(weighed_hosts):
            scheduler_host_subset_size = len(weighed_hosts)
        if scheduler_host_subset_size < 1:
            scheduler_host_subset_size = 1
        #設置host隨機選擇範圍，默認選擇第一個
        chosen_host = random.choice(
            weighed_hosts[0:scheduler_host_subset_size])
        LOG.debug("Selected host: %(host)s", {'host': chosen_host})
        selected_hosts.append(chosen_host)

        # Now consume the resources so the filter/weights
        # will change for the next instance.
        chosen_host.obj.consume_from_request(spec_obj)
        if filter_properties.get('group_updated') is True:
            filter_properties['group_hosts'].add(chosen_host.obj.host)
    return selected_hosts

　　接着回到_excute()方法，查看compute_rpcapi.prep_resize()方法，調用nova/compute/rpcapi.py.ComputeAPI.prep_resize()方法其代碼以下：

def prep_resize(self, ctxt, image, instance, instance_type, host,
                reservations=None, request_spec=None,
                filter_properties=None, node=None,
                clean_shutdown=True):
    image_p = jsonutils.to_primitive(image)
    msg_args = {'instance': instance,
                'instance_type': instance_type,
                'image': image_p,
                'reservations': reservations,
                'request_spec': request_spec,
                'filter_properties': filter_properties,
                'node': node,
                'clean_shutdown': clean_shutdown}
    version = '4.1'
    if not self.client.can_send_version(version):
        version = '4.0'
        msg_args['instance_type'] = objects_base.obj_to_primitive(
                                        instance_type)
    cctxt = self.client.prepare(server=host, version=version)
    cctxt.cast(ctxt, 'prep_resize', **msg_args)

　　接着，將消息傳給nova/compute/manager.py.ComputeManager.prep_resize()方法，該節點爲目的計算節點，其代碼和註釋以下：

@wrap_exception()
@reverts_task_state
@wrap_instance_event
@wrap_instance_fault
def prep_resize(self, context, image, instance, instance_type,
                reservations, request_spec, filter_properties, node,
                clean_shutdown):
    """Initiates the process of moving a running instance to another host.

    Possibly changes the RAM and disk size in the process.

    """
    if node is None:
		#假如上述調度中沒有選擇傳遞node，則會再次選擇一個node
        node = self.driver.get_available_nodes(refresh=True)[0]
        LOG.debug("No node specified, defaulting to %s", node,
                  instance=instance)

    # NOTE(melwitt): Remove this in version 5.0 of the RPC API
    # Code downstream may expect extra_specs to be populated since it
    # is receiving an object, so lookup the flavor to ensure this.
    if not isinstance(instance_type, objects.Flavor):
        instance_type = objects.Flavor.get_by_id(context,
                                                 instance_type['id'])

    quotas = objects.Quotas.from_reservations(context,
                                              reservations,
                                              instance=instance)
    with self._error_out_instance_on_exception(context, instance,
                                               quotas=quotas):
        compute_utils.notify_usage_exists(self.notifier, context, instance,
                                          current_period=True)
        self._notify_about_instance_usage(
                context, instance, "resize.prep.start")
        try:
            self._prep_resize(context, image, instance,
                              instance_type, quotas,
                              request_spec, filter_properties,
                              node, clean_shutdown)
        # NOTE(dgenin): This is thrown in LibvirtDriver when the
        #               instance to be migrated is backed by LVM.
        #               Remove when LVM migration is implemented.
        except exception.MigrationPreCheckError:
            raise
        except Exception:
            # try to re-schedule the resize elsewhere:
            exc_info = sys.exc_info()
            self._reschedule_resize_or_reraise(context, image, instance,
                    exc_info, instance_type, quotas, request_spec,
                    filter_properties)
        finally:
            extra_usage_info = dict(
                    new_instance_type=instance_type.name,
                    new_instance_type_id=instance_type.id)

            self._notify_about_instance_usage(
                context, instance, "resize.prep.end",
                extra_usage_info=extra_usage_info)

　　後面的內容下次繼續講解。