2022-09-19

openstack-nova-虚拟机创建流程以及源码分析(一)

openstack 虚机创建流程以及源码分析(一)

基于openstack stein

虚机创建流程图

上图是虚拟机创建流程的整体流程，可以看到整体虚拟机创建流程一次经过了API、Conductor、Scheduler、Placement、Compute等主要服务。

下表列出了每个步骤中实例的状态：

Status	Task	Power state	Steps
Build	scheduling	None	3-12
Build	networking	None	22-24
Build	block_device_mapping	None	25-27
Build	spawing	None	28
Active	none	Running

首先对从HTTP请求的获取的req和body中的相关信息进行验证，验证完成后调用nova-api的create函数。

#nova/api/openstack/compute/servers.py #ServersController
def create(self, req, body):
		......................
		self._process_bdms_for_create(
            context, target, server_dict, create_kwargs,
            supports_device_tagging)

        image_uuid = self._image_from_req_data(server_dict, create_kwargs)

        self._process_networks_for_create(
            context, target, server_dict, create_kwargs,
            supports_device_tagging)

        flavor_id = self._flavor_id_from_req_data(body)
        try:
            inst_type = flavors.get_flavor_by_flavor_id(
                    flavor_id, ctxt=context, read_deleted="no")

            supports_multiattach = common.supports_multiattach_volume(req)
            supports_port_resource_request = \
                common.supports_port_resource_request(req)
            (instances, resv_id) = self.compute_api.create(context,
                inst_type,
                image_uuid,
                display_name=name,
                display_description=description,
                availability_zone=availability_zone,
                forced_host=host, forced_node=node,
                metadata=server_dict.get('metadata', {}),
                admin_password=password,
                check_server_group_quota=True,
                supports_multiattach=supports_multiattach,
                supports_port_resource_request=supports_port_resource_request,
                **create_kwargs)
        except (exception.QuotaError,
                exception.PortLimitExceeded) as error:
            raise exc.HTTPForbidden(
                explanation=error.format_message())

其中nova-api有两种类型，即根据配置参数进行选择self.compute_api的类型。如下：

#nova/compute/__init__.py
CELL_TYPE_TO_CLS_NAME = {'api': 'nova.compute.cells_api.ComputeCellsAPI',
                         'compute': 'nova.compute.api.API',
                         None: 'nova.compute.api.API',
                        }


def _get_compute_api_class_name():
    """Returns the name of compute API class."""
    cell_type = nova.cells.opts.get_cell_type()
    return CELL_TYPE_TO_CLS_NAME[cell_type]


def API(*args, **kwargs):
    class_name = _get_compute_api_class_name()
    return importutils.import_object(class_name, *args, **kwargs)

即根据/etc/nova/nova.conf配置文件cells字段下面的enable参数来设置nova-api的类型，由于我们的环境采用默认配置，stein版本默认cell v2就是打开的，即enable=true，所以get_cell_type函数将返回api 值，因此nova-api为nova.compute.cells_api.ComputeCellsAPI，即self.compute_api为nova.compute.cells_api.ComputeCellsAPI对象。所以将会调用nova.compute.cells_api.ComputeCellsAPI中的create函数。

#nova/compute/cells_api.py
class ComputeCellsAPI(compute_api.API):
    def create(self, *args, **kwargs):
        """We can use the base functionality, but I left this here just
        for completeness.
        """
        return super(ComputeCellsAPI, self).create(*args, **kwargs)

nova.compute.cells_api.ComputeCellsAPI中的create函数, 这里默认调用的父类 nova.compute.api中的create函数。

#nova/compute/api.py
    def create(self, context, instance_type,
               image_href, kernel_id=None, ramdisk_id=None,
               min_count=None, max_count=None,
               display_name=None, display_description=None,
               key_name=None, key_data=None, security_groups=None,
               availability_zone=None, forced_host=None, forced_node=None,
               user_data=None, metadata=None, injected_files=None,
               admin_password=None, block_device_mapping=None,
               access_ip_v4=None, access_ip_v6=None, requested_networks=None,
               config_drive=None, auto_disk_config=None, scheduler_hints=None,
               legacy_bdm=True, shutdown_terminate=False,
               check_server_group_quota=False, tags=None,
               supports_multiattach=False, trusted_certs=None,
               supports_port_resource_request=False):
        """Provision instances, sending instance information to the
        scheduler.  The scheduler will determine where the instance(s)
        go and will handle creating the DB entries.

        Returns a tuple of (instances, reservation_id)
        """
        if requested_networks and max_count is not None and max_count > 1:
            self._check_multiple_instances_with_specified_ip(
                requested_networks)
            if utils.is_neutron():
                self._check_multiple_instances_with_neutron_ports(
                    requested_networks)

        if availability_zone:
            available_zones = availability_zones.\
                get_availability_zones(context.elevated(), True)
            if forced_host is None and availability_zone not in \
                    available_zones:
                msg = _('The requested availability zone is not available')
                raise exception.InvalidRequest(msg)

        filter_properties = scheduler_utils.build_filter_properties(
                scheduler_hints, forced_host, forced_node, instance_type)

        return self._create_instance(
            context, instance_type,
            image_href, kernel_id, ramdisk_id,
            min_count, max_count,
            display_name, display_description,
            key_name, key_data, security_groups,
            availability_zone, user_data, metadata,
            injected_files, admin_password,
            access_ip_v4, access_ip_v6,
            requested_networks, config_drive,
            block_device_mapping, auto_disk_config,
            filter_properties=filter_properties,
            legacy_bdm=legacy_bdm,
            shutdown_terminate=shutdown_terminate,
            check_server_group_quota=check_server_group_quota,
            tags=tags, supports_multiattach=supports_multiattach,
            trusted_certs=trusted_certs,
            supports_port_resource_request=supports_port_resource_request)

#nova/compute/api.py
def _create_instance(self, context, instance_type,
               image_href, kernel_id, ramdisk_id,
               min_count, max_count,
               display_name, display_description,
               key_name, key_data, security_groups,
               availability_zone, user_data, metadata, injected_files,
               admin_password, access_ip_v4, access_ip_v6,
               requested_networks, config_drive,
               block_device_mapping, auto_disk_config, filter_properties,
               reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
               check_server_group_quota=False, tags=None,
               supports_multiattach=False, trusted_certs=None,
               supports_port_resource_request=False):
        ..............
		self._checks_for_create_and_rebuild(context, image_id, boot_meta,
                instance_type, metadata, injected_files,
                block_device_mapping.root_bdm(), validate_numa=False)

        instance_group = self._get_requested_instance_group(context,
                                   filter_properties)

        tags = self._create_tag_list_obj(context, tags)

        instances_to_build = self._provision_instances(
            context, instance_type, min_count, max_count, base_options,
            boot_meta, security_groups, block_device_mapping,
            shutdown_terminate, instance_group, check_server_group_quota,
            filter_properties, key_pair, tags, trusted_certs,
            supports_multiattach, network_metadata)      
                if CONF.cells.enable:
            # NOTE(danms): CellsV1 can't do the new thing, so we
            # do the old thing here. We can remove this path once
            # we stop supporting v1.
            for instance in instances:
                instance.create()
            # NOTE(melwitt): We recheck the quota after creating the objects
            # to prevent users from allocating more resources than their
            # allowed quota in the event of a race. This is configurable
            # because it can be expensive if strict quota limits are not
            # required in a deployment.
            if CONF.quota.recheck_quota:
                try:
                    compute_utils.check_num_instances_quota(
                        context, instance_type, 0, 0,
                        orig_num_req=len(instances))
                except exception.TooManyInstances:
                    with excutils.save_and_reraise_exception():
                        # Need to clean up all the instances we created
                        # along with the build requests, request specs,
                        # and instance mappings.
                        self._cleanup_build_artifacts(instances,
                                                      instances_to_build)

            self.compute_task_api.build_instances(context,
                instances=instances, image=boot_meta,
                filter_properties=filter_properties,
                admin_password=admin_password,
                injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=block_device_mapping,
                legacy_bdm=False)

这里调用 _create_instance创建虚拟机，nova-api 在数据库中为新实例创建一个数据库条目，最终调用 compute_task_api.build_instances 创建虚拟机

#nova/conductor/api.py
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping, legacy_bdm=True,
            request_spec=None, host_lists=None):
        self.conductor_compute_rpcapi.build_instances(context,
                instances=instances, image=image,
                filter_properties=filter_properties,
                admin_password=admin_password, injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=block_device_mapping,
                legacy_bdm=legacy_bdm, request_spec=request_spec,
                host_lists=host_lists)

消息调用conductor的rpcapi 接口从而传给conductor，让它来build instance

#nova/api/openstack/server.py
def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping, legacy_bdm=True,
            request_spec=None, host_lists=None):
    ..........
	cctxt.cast(context, 'build_instances', **kwargs)

为什么通过RPC的cast调用就会调用到该位置的build_instances函数呢？这是因为在nova-conductor服务启动时，会去创建相关的RPC-server，而这些RPC-server创建时候将会去指定一些endpoints(与keystone中的endpoints含义不同，这里只是名称相同而已)，而这些endpoints中包括一些对象列表，当RPC-client去调用相应的RPC-server中的函数时，则会在这些endpoints的对象列表中进行查找，然后调用相应的函数，具体可以看下 #/nova/service.py:Service start 流程，更加详细的可以查询openstack rpc相关。

#nova/conductor/manager.py
def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True,
            request_spec=None, host_lists=None):
            
	 try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            # TODO(sbauza): Provide directly the RequestSpec object
            # when populate_retry() accepts it
            scheduler_utils.populate_retry(
                filter_properties, instances[0].uuid)
            instance_uuids = [instance.uuid for instance in instances]
            spec_obj = objects.RequestSpec.from_primitives(
                    context, legacy_request_spec, filter_properties)
            LOG.debug("Rescheduling: %s", is_reschedule)
            if is_reschedule:
                # Make sure that we have a host, as we may have exhausted all
                # our alternates
                if not host_lists[0]:
                    # We have an empty list of hosts, so this instance has
                    # failed to build.
                    msg = ("Exhausted all hosts available for retrying build "
                           "failures for instance %(instance_uuid)s." %
                           {"instance_uuid": instances[0].uuid})
                    raise exception.MaxRetriesExceeded(reason=msg)
            else:
                # This is not a reschedule, so we need to call the scheduler to
                # get appropriate hosts for the request.
                # NOTE(gibi): We only call the scheduler if using cells v1 or
                # we are rescheduling from a really old compute. In
                # either case we do not support externally-defined resource
                # requests, like port QoS. So no requested_resources are set
                # on the RequestSpec here.
                host_lists = self._schedule_instances(context, spec_obj,
                        instance_uuids, return_alternates=True)

第一次不会重新调度，调用 _schedule_instances 里的 self.query_client.select_destinations 选择计算节点，conductor调用scheduler client 发起rpc请求

#nova/scheduler/client/query.py
class SchedulerQueryClient(object):
    """Client class for querying to the scheduler."""

    def __init__(self):
        self.scheduler_rpcapi = scheduler_rpcapi.SchedulerAPI()

    def select_destinations(self, context, spec_obj, instance_uuids,
            return_objects=False, return_alternates=False):
        """Returns destinations(s) best suited for this request_spec and
        filter_properties.

        When return_objects is False, the result will be the "old-style" list
        of dicts with 'host', 'nodename' and 'limits' as keys. The value of
        return_alternates is ignored.

        When return_objects is True, the result will be a list of lists of
        Selection objects, with one list per instance. Each instance's list
        will contain a Selection representing the selected (and claimed) host,
        and, if return_alternates is True, zero or more Selection objects that
        represent alternate hosts. The number of alternates returned depends on
        the configuration setting `CONF.scheduler.max_attempts`.
        """
        return self.scheduler_rpcapi.select_destinations(context, spec_obj,
                instance_uuids, return_objects, return_alternates)

#nova/scheduler/rpcapi.py
    def select_destinations(self, ctxt, spec_obj, instance_uuids,
            return_objects=False, return_alternates=False):
        # Modify the parameters if an older version is requested
        .................
        return cctxt.call(ctxt, 'select_destinations', **msg_args)

消息通过rpc请求发给了scheduler

#nova/scheduler/manager.py
def select_destinations(self, ctxt, request_spec=None,
            filter_properties=None, spec_obj=_sentinel, instance_uuids=None,
            return_objects=False, return_alternates=False):
        .........................
        if self.driver.USES_ALLOCATION_CANDIDATES and not is_rebuild:
            # Only process the Placement request spec filters when Placement
            # is used.
            try:
                request_filter.process_reqspec(ctxt, spec_obj)
            except exception.RequestFilterFailed as e:
                raise exception.NoValidHost(reason=e.message)

            resources = utils.resources_from_request_spec(spec_obj)
            res = self.placement_client.get_allocation_candidates(ctxt,
                                                                  resources)
            if res is None:
                # We have to handle the case that we failed to connect to the
                # Placement service and the safe_connect decorator on
                # get_allocation_candidates returns None.
                alloc_reqs, provider_summaries, allocation_request_version = (
                        None, None, None)
            else:
                (alloc_reqs, provider_summaries,
                            allocation_request_version) = res
            if not alloc_reqs:
                LOG.info("Got no allocation candidates from the Placement "
                         "API. This could be due to insufficient resources "
                         "or a temporary occurrence as compute nodes start "
                         "up.")
                raise exception.NoValidHost(reason="")
            else:
                # Build a dict of lists of allocation requests, keyed by
                # provider UUID, so that when we attempt to claim resources for
                # a host, we can grab an allocation request easily
                alloc_reqs_by_rp_uuid = collections.defaultdict(list)
                for ar in alloc_reqs:
                    for rp_uuid in ar['allocations']:
                        alloc_reqs_by_rp_uuid[rp_uuid].append(ar)

        # Only return alternates if both return_objects and return_alternates
        # are True.
        return_alternates = return_alternates and return_objects
        selections = self.driver.select_destinations(ctxt, spec_obj,
                instance_uuids, alloc_reqs_by_rp_uuid, provider_summaries,
                allocation_request_version, return_alternates)
        # If `return_objects` is False, we need to convert the selections to
        # the older format, which is a list of host state dicts.
        if not return_objects:
            selection_dicts = [sel[0].to_dict() for sel in selections]
            return jsonutils.to_primitive(selection_dicts)
        return selections

scheduler选择目标节点时会查询placement选择合适候选人 , 再调用 scheduler自身的filter 选择节点 self.driver.select_destinations

#nova/scheduler/filter_scheduler.py
def select_destinations(self, context, spec_obj, instance_uuids,
            alloc_reqs_by_rp_uuid, provider_summaries,
            allocation_request_version=None, return_alternates=False):
.....................
        host_selections = self._schedule(context, spec_obj, instance_uuids,
                alloc_reqs_by_rp_uuid, provider_summaries,
                allocation_request_version, return_alternates)
        self.notifier.info(
            context, 'scheduler.select_destinations.end',
            dict(request_spec=spec_obj.to_legacy_request_spec_dict()))
        compute_utils.notify_about_scheduler_action(
            context=context, request_spec=spec_obj,
            action=fields_obj.NotificationAction.SELECT_DESTINATIONS,
            phase=fields_obj.NotificationPhase.END)
        return host_selections

def _schedule(self, context, spec_obj, instance_uuids,
            alloc_reqs_by_rp_uuid, provider_summaries,
            allocation_request_version=None, return_alternates=False):
    .......        
    hosts = self._get_all_host_states(elevated, spec_obj,
                provider_summaries) 
	.........
	for num, instance_uuid in enumerate(instance_uuids):
		hosts = self._get_sorted_hosts(spec_obj, hosts, num)
		
	.........
	# Check if we were able to fulfill the request. If not, this call will
        # raise a NoValidHost exception.
        self._ensure_sufficient_hosts(context, claimed_hosts, num_instances,
                claimed_instance_uuids)

获取前面placement 候选人 resource provider的主机状态信息, select_destinations 调度选择好计算节点