您的位置:首页 > 编程语言

OpenStack 启动云主机代码执行过程

2014-11-05 22:12 309 查看
先说下哈,本人是OpenStack小白一枚,才开始学习,有太多东西都不懂,源码更是刚开始看,并且到现在为止没找到好的学习源码的方法,如有路过的,麻烦评论里指点下有什么好的方法学习OpenStack源码的方法,比如文档啊、官方的也行,我连官方文档都没怎么看过,目前看代码还停留在死磕的层面

,求指点啊,先行跪谢啊。

环境:OpenStack-Icehouse, 一台控制节点(nova,
glance, keystone, neutron(rabbitmq)),一台计算节点

在控制节点上,按照http://docs.openstack.org/developer/horizon/quickstart.html 搭了个horizon
developer

基本上可以说是最基本的配置,能启个虚拟机神马的,还是求指点啊。。

然后就开始了苦逼的看代码“旅程”,下面的过程可能有不完整的,因为我也只是顺了一遍代码流程,还有各种不懂,求海涵&指点啊。。

正式开始:

我是从 openstack_dashboard/dashboards/project/instances/workflows/create_instance.py开始看的,先说明下这个文件时负责web端启动云主机的,还有另外一个openstack_dashboard/dashboards/project/databases/workflows/create_instance.py貌似是管命令行启动的

注意:1-16步是在控制节点,17-18步是在计算节点

1. openstack_dashboard/dashboards/project/instances/workflows/create_instance.py

class LaunchInstance(workflows.Workflow):
slug = "launch_instance"
name = _("Launch Instance")
finalize_button_name = _("Launch")
success_message = _('Launched %(count)s named "%(name)s".')
failure_message = _('Unable to launch %(count)s named "%(name)s".')
success_url = "horizon:project:instances:index"
default_steps = (SelectProjectUser,
SetInstanceDetails,
SetAccessControls, #第一项详情,对应的action(SetInstanceDetailsAction)
SetNetwork,
PostCreationStep,
SetAdvanced) #点击启动云主机后,跳出的初始化步骤


-> 执行LaunchInstance的handle函数

@sensitive_variables('context')
def handle(self, request, context):
# 中间的代码省略了
try:
api.nova.server_create(request,
context['name'],
image_id,
context['flavor'],
context['keypair_id'],
normalize_newlines(custom_script),
context['security_group_ids'],
block_device_mapping=dev_mapping_1,
block_device_mapping_v2=dev_mapping_2,
nics=nics,
availability_zone=avail_zone,
instance_count=int(context['count']),
admin_pass=context['admin_pass'],
disk_config=context['disk_config'],
product_line=context['product_line']) #调用API,新建开始了
return True
except Exception:
exceptions.handle(request)
return False

2. api.nova.server_create()函数将走向->openstack_dashboard/api/nova.py

def server_create(request, name, image, flavor, key_name, user_data,
security_groups, block_device_mapping=None,
block_device_mapping_v2=None, nics=None,
availability_zone=None, instance_count=1, admin_pass=None,
disk_config=None, product_line=None):
# novaclient(request)会返回一个Client(novaclient/v1_1/client.py)对象
# novaclient(request).servers是ServerManager
# novaclient(request).servers.create() 就是ServerManager.create
return Server(novaclient(request).servers.create(
name, image, flavor, userdata=user_data,
security_groups=security_groups,
key_name=key_name, block_device_mapping=block_device_mapping,
block_device_mapping_v2=block_device_mapping_v2,
nics=nics, availability_zone=availability_zone,
min_count=instance_count, admin_pass=admin_pass,
disk_config=disk_config, product_line=product_line), request) #跳转


3. 调转至-> novaclient/v1_1/servers.py -> ServerManager.create函数

def create(self, name, image, flavor, meta=None, files=None,
reservation_id=None, min_count=None,
max_count=None, security_groups=None, userdata=None,
key_name=None, availability_zone=None,
block_device_mapping=None, block_device_mapping_v2=None,
nics=None, scheduler_hints=None,
config_drive=None, disk_config=None, **kwargs):

if not min_count:
min_count = 1
if not max_count:
max_count = min_count
if min_count > max_count:
min_count = max_count

boot_args = [name, image, flavor]

boot_kwargs = dict(
meta=meta, files=files, userdata=userdata,
reservation_id=reservation_id, min_count=min_count,
max_count=max_count, security_groups=security_groups,
key_name=key_name, availability_zone=availability_zone,
scheduler_hints=scheduler_hints, config_drive=config_drive,
disk_config=disk_config, **kwargs)

if block_device_mapping:
resource_url = "/os-volumes_boot"
boot_kwargs['block_device_mapping'] = block_device_mapping
elif block_device_mapping_v2:
resource_url = "/os-volumes_boot"
boot_kwargs['block_device_mapping_v2'] = block_device_mapping_v2
else:
resource_url = "/servers"
if nics:
boot_kwargs['nics'] = nics

response_key = "server"
return self._boot(resource_url, response_key, *boot_args,
**boot_kwargs) # 调转至ServerManager._boot函数


仍在 novaclient/v1_1/servers.py

def _boot(self, resource_url, response_key, name, image, flavor,
meta=None, files=None, userdata=None,
reservation_id=None, return_raw=False, min_count=None,
max_count=None, security_groups=None, key_name=None,
availability_zone=None, block_device_mapping=None,
block_device_mapping_v2=None, nics=None, scheduler_hints=None,
config_drive=None, admin_pass=None, disk_config=None, **kwargs):

#之前都是一些初始工作,省略代码啦
return self._create(resource_url, body, response_key,
return_raw=return_raw, **kwargs)   #跳转基类Manager

4. 调转至novaclient/base.py Manager._create()函数

def _create(self, url, body, response_key, return_raw=False, **kwargs):
self.run_hooks('modify_body_for_create', body, **kwargs)
_resp, body = self.api.client.post(url, body=body)
#这里继续调用API 做post,这里我卡了好久找不到,感谢万能的网友,self.client是novaclient.v1_1.client.Client,self.api.client是novaclient.client.HTTPClient(novaclient/client.py),接下面正文
if return_raw:
return body[response_key]
with self.completion_cache('human_id', self.resource_class, mode="a"):
with self.completion_cache('uuid', self.resource_class, mode="a"):
return self.resource_class(self, body[response_key])
接上面注释,HTTPClient的post方法,novaclient/client.py

def post(self, url, **kwargs):
return self._cs_request(url, 'POST', **kwargs) #接着跳转
不粘代码了,太多了,一个文件里面的调用就直接写下吧~好吧,我只是懒


HTTPClient.post -> HTTPClient._cs_request -> HTTPClient._time_request -> HTTPClient.request

def request(self, url, method, **kwargs):
kwargs.setdefault('headers', kwargs.get('headers', {}))
kwargs['headers']['User-Agent'] = self.USER_AGENT
kwargs['headers']['Accept'] = 'application/json'
if 'body' in kwargs:
kwargs['headers']['Content-Type'] = 'application/json'
kwargs['data'] = json.dumps(kwargs['body'])
del kwargs['body']
if self.timeout is not None:
kwargs.setdefault('timeout', self.timeout)
kwargs['verify'] = self.verify_cert

self.http_log_req(method, url, kwargs)

request_func = requests.request
session = self._get_session(url)
if session:
request_func = session.request

resp = request_func(
method,
url,
**kwargs) #这里跳转了, site-packages/requests/api.py, 然后就各种调用最后发出一个POST请求

self.http_log_resp(resp)

if resp.text:
# TODO(dtroyer): verify the note below in a requests context
# NOTE(alaski): Because force_exceptions_to_status_code=True
# httplib2 returns a connection refused event as a 400 response.
# To determine if it is a bad request or refused connection we need
# to check the body.  httplib2 tests check for 'Connection refused'
# or 'actively refused' in the body, so that's what we'll do.
if resp.status_code == 400:
if ('Connection refused' in resp.text or
'actively refused' in resp.text):
raise exceptions.ConnectionRefused(resp.text)
try:
body = json.loads(resp.text)
except ValueError:
body = None
else:
body = None

if resp.status_code >= 400:
raise exceptions.from_response(resp, body, url, method)

return resp, body
请求发出后,Openstack 中的wsgi服务会接受处理该请求,到这里dashboard基本已经完成了在启动云主机过程中角色。

这里我看的也不是很懂,望懂得同学指点下。

我感觉是一些nova.po文件会调用Controller(wsgi.Controller)的create函数

5. 在nova/api/openstack/compute/server.py中-> Controller.create()

@wsgi.response(202)
@wsgi.serializers(xml=FullServerTemplate)
@wsgi.deserializers(xml=CreateDeserializer)
def create(self, req, body):
"""Creates a new server for a given user."""
#此处我略去了中间代码,不然太长了
try:
_get_inst_type = flavors.get_flavor_by_flavor_id
inst_type = _get_inst_type(flavor_id, ctxt=context,
read_deleted="no")
(instances, resv_id) = self.compute_api.create(context, # 这里讲调用compute的API,跳转
inst_type,
image_uuid,
display_name=name,
display_description=name,
key_name=key_name,
metadata=server_dict.get('metadata', {}),
access_ip_v4=access_ip_v4,
access_ip_v6=access_ip_v6,
injected_files=injected_files,
admin_password=password,
min_count=min_count,
max_count=max_count,
requested_networks=requested_networks,
security_group=sg_names,
user_data=user_data,
availability_zone=availability_zone,
config_drive=config_drive,
block_device_mapping=block_device_mapping,
auto_disk_config=auto_disk_config,
scheduler_hints=scheduler_hints,
legacy_bdm=legacy_bdm)


6. 调转至
nova/compute/api.py -> API.create() 从英文注释中可以看到,这是在为调度准备instance (instance information)

@hooks.add_hook("create_instance")
def create(self, context, instance_type,
image_href, kernel_id=None, ramdisk_id=None,
min_count=None, max_count=None,
display_name=None, display_description=None,
key_name=None, key_data=None, security_group=None,
availability_zone=None, user_data=None, metadata=None,
injected_files=None, admin_password=None,
block_device_mapping=None, access_ip_v4=None,
access_ip_v6=None, requested_networks=None, config_drive=None,
auto_disk_config=None, scheduler_hints=None, legacy_bdm=True, **kwargs):
"""Provision instances, sending instance information to the
scheduler.  The scheduler will determine where the instance(s)
go and will handle creating the DB entries.

Returns a tuple of (instances, reservation_id)
"""

self._check_create_policies(context, availability_zone,
requested_networks, block_device_mapping)

if requested_networks and max_count > 1 and utils.is_neutron():
self._check_multiple_instances_neutron_ports(requested_networks)
return self._create_instance(                  <span style="font-family: Arial, Helvetica, sans-serif;">#调转</span>
context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_group,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
scheduler_hints=scheduler_hints,
legacy_bdm=legacy_bdm)
跳转至-> API._create_instance

def _create_instance(self, context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_groups,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
reservation_id=None, scheduler_hints=None,
legacy_bdm=True):
"""Verify all the input parameters regardless of the provisioning
strategy being performed and schedule the instance(s) for
creation.
"""

#中间代码(主要是确认各个参数)省略
<span style="white-space:pre">	</span>#compute_task_api 即为conductor api, 跳转至 conductor/api.py
self.compute_task_api.build_instances(context,
instances=instances, image=boot_meta,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=False)

return (instances, reservation_id)


7. 跳转至nova/conductor/api.py -> ComputeTaskAPI.buid_instances

def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True):
# 将跳转 nova/conductor/rpcapi.py
self.conductor_compute_rpcapi.build_instances(context,
instances=instances, image=image,
filter_properties=filter_properties,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=legacy_bdm)


8 跳转至 nova/conductor/rpcapi.py ->ComputeTaskAPI.build_instances

def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True):
image_p = jsonutils.to_primitive(image)
cctxt = self.client.prepare(version='1.5')
#将跳转至,site-packages/oslo/messaging/rpc/client.py
cctxt.cast(context, 'build_instances',
instances=instances, image=image_p,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=legacy_bdm)


9. 将跳转至
site-packages/oslo/messaging/rpc/client.py -> RPCClient.cast() -> _CallContext.cast()
_CallContext.cast()是个神奇的函数啊(请注意此处,后面几步会重复调用这里,现在是 'build_instance')我感觉他是发给对应API的Manager类了

def cast(self, ctxt, method, **kwargs):
"""Invoke a method and return immediately. See RPCClient.cast()."""
msg = self._make_message(ctxt, method, kwargs)
ctxt = self.serializer.serialize_context(ctxt)

if self.version_cap:
self._check_version_cap(msg.get('version'))
try:
self.transport._send(self.target, ctxt, msg) #就是这里,我其实也没有看的很清楚
except driver_base.TransportDriverError as ex:
raise ClientSendError(self.target, ex)
10. _CallContext.cast() 我感觉他是对函数的验证,但只是猜测,求指点。这里的调用过程可能不对啊,请注意。

_CallContext.cast()->site-packages/oslo/messaging/transport.py
Transport._send() -> site-packages/oslo/messaging/_drivers/impl_zmq.py ZmqSocket.send()
然后感觉是socket传递,但是我是很懂socket,接下来就不知道是怎么回事儿了。
11. 上一步说了,感觉_CallContext.cast()是个神奇的函数,上面函数调用的method为'build_instances', 接下来经过各种调用,会到达
site-packages/nova/conductor/manager.py -> ComputeTaskManager.build_instances
各种API类和其对应的Manager类的关系我还不是很懂,还要后续再去看看,大家有了解的麻烦评论告知下,谢谢~

def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True):
request_spec = scheduler_utils.build_request_spec(context, image,
instances)
# NOTE(alaski): For compatibility until a new scheduler method is used.
request_spec.update({'block_device_mapping': block_device_mapping,
'security_group': security_groups})
#这里的scheduler_rpcpi即为site-packages/nova/scheduler/rpcapi.py 中的SchedulerAPI.run_instance()

self.scheduler_rpcapi.run_instance(context, request_spec=request_spec,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks, is_first_time=True,
filter_properties=filter_properties,
legacy_bdm_in_spec=legacy_bdm)


12跳转至 site-packages/nova/scheduler/rpcapi.py -> SchedulerAPI.run_instance()

def run_instance(self, ctxt, request_spec, admin_password,
injected_files, requested_networks, is_first_time,
filter_properties, legacy_bdm_in_spec=True):
msg_kwargs = {'request_spec': request_spec,
'admin_password': admin_password,
'injected_files': injected_files,
'requested_networks': requested_networks,
'is_first_time': is_first_time,
'filter_properties': filter_properties,
'legacy_bdm_in_spec': legacy_bdm_in_spec}
#请注意此处和第8步绿色标注是相同的,只不过是换为了'run_instance'
cctxt = self.client.prepare()
cctxt.cast(ctxt, 'run_instance', **msg_kwargs)


13 所以接下来的各种跳转为第9、10、11步, 然后会调用 site-packages/nova/scheduler/manager.py -> SchedulerManager.run_instance()
,即接下来进入调度,为虚拟机寻找合适的计算节点

def run_instance(self, context, request_spec, admin_password,
injected_files, requested_networks, is_first_time,
filter_properties, legacy_bdm_in_spec=True):
"""Tries to call schedule_run_instance on the driver.
Sets instance vm_state to ERROR on exceptions
"""
instance_uuids = request_spec['instance_uuids']
with compute_utils.EventReporter(context, conductor_api.LocalAPI(),
'schedule', *instance_uuids):
try:
# 正式进入调度,driver 即为调度器,默认会调用 site-packages/nova/scheduler/filter_scheduler.py
return self.driver.schedule_run_instance(context,
request_spec, admin_password, injected_files,
requested_networks, is_first_time, filter_properties,
legacy_bdm_in_spec)

except exception.NoValidHost as ex:
# don't re-raise
self._set_vm_state_and_notify('run_instance',
{'vm_state': vm_states.ERROR,
'task_state': None},
context, ex, request_spec)
except Exception as ex:
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify('run_instance',
{'vm_state': vm_states.ERROR,
'task_state': None},
context, ex, request_spec)


关于driver调度器,推荐看http://krystism.is-programmer.com/posts/64410.html
http://krystism.is-programmer.com/posts/64486.html

这两篇文章,我看过之后也是受益匪浅,谢谢博主呢~
14 接下来,会跳转至site-packages/nova/scheduler/filter_scheduler.py->
FilterScheduler.schedule_run_instance()

def schedule_run_instance(self, context, request_spec,
admin_password, injected_files,
requested_networks, is_first_time,
filter_properties, legacy_bdm_in_spec):
"""This method is called from nova.compute.api to provision
an instance.  We first create a build plan (a list of WeightedHosts)
and then provision.

Returns a list of the instances created.
"""
#中间代码省略,即为虚拟机寻找宿主机的过程
for num, instance_uuid in enumerate(instance_uuids):
request_spec['instance_properties']['launch_index'] = num

try:
try:
weighed_host = weighed_hosts.pop(0)
LOG.info(_("Choosing host %(weighed_host)s "
"for instance %(instance_uuid)s"),
{'weighed_host': weighed_host,
'instance_uuid': instance_uuid})
except IndexError:
raise exception.NoValidHost(reason="")
#仍在FilterScheduler中跳转
self._provision_resource(context, weighed_host,
request_spec,
filter_properties,
requested_networks,
injected_files, admin_password,
is_first_time,
instance_uuid=instance_uuid,
legacy_bdm_in_spec=legacy_bdm_in_spec)
except Exception as ex:
# NOTE(vish): we don't reraise the exception here to make sure
#             that all instances in the request get set to
#             error properly
driver.handle_schedule_error(context, ex, instance_uuid,
request_spec)
# scrub retry host list in case we're scheduling multiple
# instances:
retry = filter_properties.get('retry', {})
retry['hosts'] = []

self.notifier.info(context, 'scheduler.run_instance.end', payload)


15.
FilterScheduler.schedule_run_instance() ->
FilterScheduler._provision_resource()

def _provision_resource(self, context, weighed_host, request_spec,
filter_properties, requested_networks, injected_files,
admin_password, is_first_time, instance_uuid=None,
legacy_bdm_in_spec=True):
"""Create the requested resource in this Zone."""
# NOTE(vish): add our current instance back into the request spec
request_spec['instance_uuids'] = [instance_uuid]
payload = dict(request_spec=request_spec,
weighted_host=weighed_host.to_dict(),
instance_id=instance_uuid)
self.notifier.info(context,
'scheduler.run_instance.scheduled', payload)

# Update the metadata if necessary
scheduler_hints = filter_properties.get('scheduler_hints') or {}
try:
updated_instance = driver.instance_update_db(context,
instance_uuid)
except exception.InstanceNotFound:
LOG.warning(_("Instance disappeared during scheduling"),
context=context, instance_uuid=instance_uuid)

else:
scheduler_utils.populate_filter_properties(filter_properties,
weighed_host.obj)
<span style="white-space:pre">	</span>    #将跳转至site-packages/nova/compute/rpcapi.py -> ComputeAPI.run_instance()
self.compute_rpcapi.run_instance(context,
instance=updated_instance,
host=weighed_host.obj.host,
request_spec=request_spec,
filter_properties=filter_properties,
requested_networks=requested_networks,
injected_files=injected_files,
admin_password=admin_password, is_first_time=is_first_time,
node=weighed_host.obj.nodename,
legacy_bdm_in_spec=legacy_bdm_in_spec)
16. 调用 site-packages/nova/compute/rpcapi.py
-> ComputeAPI.run_instance()

def run_instance(self, ctxt, instance, host, request_spec,
filter_properties, requested_networks,
injected_files, admin_password,
is_first_time, node=None, legacy_bdm_in_spec=True):
# NOTE(russellb) Havana compat
version = self._get_compat_version('3.0', '2.37')
instance_p = jsonutils.to_primitive(instance)
msg_kwargs = {'instance': instance_p, 'request_spec': request_spec,
'filter_properties': filter_properties,
'requested_networks': requested_networks,
'injected_files': injected_files,
'admin_password': admin_password,
'is_first_time': is_first_time, 'node': node,
'legacy_bdm_in_spec': legacy_bdm_in_spec}
<span style="white-space:pre">	</span>#与第12步程序调用相同,但ComputeManager在计算节点,即该虚拟机的宿主机上,所以后续会发送消息至消息队列,对应的计算节点读取消息队列,进行虚拟分配的后续工作
cctxt = self.client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'run_instance', **msg_kwargs)
至此控制节点的所有操作基本完成,接下来将转至计算节点。

====================================================================================
17. site-packages/nova/compute/manager.py ComputeManager.run_instance()

@wrap_exception()
@reverts_task_state
@wrap_instance_event
@wrap_instance_fault
def run_instance(self, context, instance, request_spec,
filter_properties, requested_networks,
injected_files, admin_password,
is_first_time, node, legacy_bdm_in_spec):

if filter_properties is None:
filter_properties = {}

@utils.synchronized(instance['uuid'])
def do_run_instance():
self._run_instance(context, request_spec,
filter_properties, requested_networks, injected_files,
admin_password, is_first_time, node, instance,
legacy_bdm_in_spec)
do_run_instance() #调用上面的函数


18 ComputeManager.run_instance() -> ComputeManager._run_instance() -> ComputeManager._build_instance()
-> ComputeManager._spawn() 这就是web端启动云主机后显示的(spawning,孵化)。此函数结束后,云主机就新建完成,即返回消息给控制节点。

=============================================================================
至此即为启动云主机(虚拟机)的代码调用过程。

这个过程我自己已经查看了两三遍,整个流程应该是没有问题,但细节说明可能会有不准确,由于刚开始接触Openstack,一切还处于摸索阶段,如有遗漏,请大家评论告知会及时更新。
此文章仅为OpenStack启动云主机函数调用流程,并未附以详细的函数注释,也许会在后面更加熟悉OpenStack后再添加,望能和大家共同进步,求大牛指点啊,看代码看的快吐血啦


白了个白喽
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: