首页 > 技术文章 > inspect流程

gushiren 2018-08-21 17:49 原文

当node节点state为manage时,可执行inspector
ironic node-set-provision-state <node_uuid> manage
ironic node-set-provision-state <node_uuid> inspect

inspect阶段

执行inspect后ironic会发送PUT请求到 /v1/nodes/{node_ident}/provision , ironic-api 收到这个请求后会根据 body 的 target 字段做处理:

class NodeStatesController(rest.RestController):
    _custom_actions = {
    'power': ['PUT'],
    'provision': ['PUT'],
    'raid': ['PUT'],
}
    def get(self, node_ident):
        .......
    def raid(self, node_ident, target_raid_config):
        ......
    def power(self, node_ident, target, timeout=None):
        ......
    def provision(self, node_ident, target, configdrive=None,
              clean_steps=None, rescue_password=None):
        if target in (ir_states.ACTIVE, ir_states.REBUILD):#判断状态
            rebuild = (target == ir_states.REBUILD)
            pecan.request.rpcapi.do_node_deploy(context=pecan.request.context,
                                        node_id=rpc_node.uuid,
                                        rebuild=rebuild,
                                        configdrive=configdrive,
                                        topic=topic)
        elif target == ir_states.VERBS['inspect']:#调用inspect_hardware方法
            pecan.request.rpcapi.inspect_hardware(pecan.request.context, rpc_node.uuid, topic=topic)
ironic/api/controllers/v1/node.py

然后通过发送 http 请求 到 ironic-inspector。inspect 的具体实现是跟 driver 有关,在 driver.inspect.inspect_hardware 中

def inspect_hardware(self, task):
    #检查硬件以获取硬件属性
    eventlet.spawn_n(_start_inspection, task.node.uuid, task.context)
    return states.INSPECTING
  
def _start_inspection(node_uuid, context):
    try:
        _get_client(context).introspect(node_uuid)#调取_get_client函数,该函数调取keystone对inspector进行认证
def _get_client(context):
    .......
    return client.ClientV1(api_version=INSPECTOR_API_VERSION,
                       session=session,
                       inspector_url=inspector_url)#调取inspetor client
ironic/drivers/modules/inspector.py
class ClientV1(http.BaseClient):
    #客户端V1版本
    .......
    #创建一个客户端
    def __init__(self, **kwargs):
        kwargs.setdefault('api_version', DEFAULT_API_VERSION)
        super(ClientV1, self).__init__(**kwargs)
        self.rules = RulesAPI(self.request)#获取自省规则
  
    #启动节点自省
    def introspect(self, uuid):
        if not isinstance(uuid, six.string_types):
            raise TypeError(
             _("Expected string for uuid argument, got %r") % uuid)
 
        self.request('post', '/introspection/%s' % uuid)
ironic_inspector_client/v1.py

inspector处理阶段

@api('/v1/introspection/<node_id>',
     rule="introspection:{}",
     verb_to_rule_map={'GET': 'status', 'POST': 'start'},
     methods=['GET', 'POST'])
def api_introspection(node_id):
    if flask.request.method == 'POST':#如果请求是post,验证token,返回202,即接受请求
        introspect.introspect(node_id,
                              token=flask.request.headers.get('X-Auth-Token'))#调取introspect函数,为节点启动硬件属性自省。
        return '', 202
    else:
        node_info = node_cache.get_node(node_id)
        return flask.json.jsonify(generate_introspection_status(node_info))
ironic-inspector/ironic_inspector/main.py
def introspect(node_id, token=None):
    node_info = node_cache.start_introspection(node.uuid,#更新了ipmi信息,在attributes表里添加bmc_address信息
                                           bmc_address=bmc_address,
                                           ironic=ironic)
    utils.executor().submit(_background_introspect, node_info, ironic)
  
def _background_introspect(node_info, ironic):
    ......
    ......
    node_info.acquire_lock()
    _background_introspect_locked(node_info, ironic)#调用 _background_introspect做主机发现。
  
def _background_introspect_locked(node_info, ironic):
    ......
    try:
        ironic.node.set_boot_device(node_info.uuid, 'pxe',
                                persistent=False)
    except Exception as exc:
        LOG.warning('Failed to set boot device to PXE: %s',
                exc, node_info=node_info)
    try:
        ironic.node.set_power_state(node_info.uuid, 'reboot')
    except Exception as exc:
        raise utils.Error(_('Failed to power on the node, check it\'s '
                        'power management configuration: %s'),
                      exc, node_info=node_info)
ironic_inspector/introspect.py

ipa阶段

class IronicPythonAgent(base.ExecuteCommandMixin):
    ......
    ......
    def run(self):
        self.started_at = _time()
        hardware.load_managers()
        if self.hardware_initialization_delay > 0:
            LOG.info('Waiting %d seconds before proceeding',
                self.hardware_initialization_delay)
            time.sleep(self.hardware_initialization_delay)
        if not self.standalone:
            uuid = None
            if cfg.CONF.inspection_callback_url:
                uuid = inspector.inspect()#因配置了callback_url,跳转到inspect
ironic_python_agent/agent.py
def inspect():
    ....
    if not CONF.inspection_callback_url:
        LOG.info('Inspection is disabled, skipping')
        return
    collector_names = [x.strip() for x in CONF.inspection_collectors.split(',')
                   if x.strip()]
    LOG.info('inspection is enabled with collectors %s', collector_names)
    failures = utils.AccumulatedFailures(exc_class=errors.InspectionError)
    data = {}
  
    try:
        # 对应该文件中的collect_*函数,默认为default,对应collect_default
        ext_mgr = extension_manager(collector_names)
        collectors = [(ext.name, ext.plugin) for ext in ext_mgr]
    except Exception as exc:
        with excutils.save_and_reraise_exception():
            failures.add(exc)
            call_inspector(data, failures)
 
    for name, collector in collectors:
        try:
            # 例:此处为collect_default
            collector(data, failures)
        except Exception as exc:
            failures.add('collector %s failed: %s', name, exc)
    # 跳转到call_inspector函数,该函数用于发送data给inspector
    resp = call_inspector(data, failures)
    failures.raise_if_needed()
    if resp is None:
        LOG.info('stopping inspection, as inspector returned an error')
        return
    LOG.info('inspection finished successfully')
    return resp.get('uuid')
  
def collect_default(data, failures):
    # 调用ironic_python_agent.hardware.py中的list_hardware_info函数
    inventory = hardware.dispatch_to_managers('list_hardware_info')
    # 添加到data中
    data['inventory'] = inventory
ironic_python_agent/inspector.py

ipa 可以看到除了collect_default,还提供了collect_logs、collect_extra_hardware、collect_pci_devices_info三个函数,分别用于收集系统日志、收集benchmark、收集pci设备信息 
ironic_python_agent.hardware.py 可以看看collect_default收集了哪些信息

class HardwareManager(object):
        ......
        .....
    def list_hardware_info(self):
        hardware_info = {}
        hardware_info['interfaces'] = self.list_network_interfaces()#网卡
        hardware_info['cpu'] = self.get_cpus()#cpu
        hardware_info['disks'] = self.list_block_devices()#硬盘
        hardware_info['memory'] = self.get_memory()#内存
        hardware_info['bmc_address'] = self.get_bmc_address()#bmc地址
        hardware_info['system_vendor'] = self.get_system_vendor_info()#系统厂商信息
        hardware_info['boot'] = self.get_boot_info()#启动信息
        return hardware_info#返回硬件信息
ironic_python_agent/hardware.py

 ipa收集BM信息并将其发送给ipa-inspection-callback-url

@api('/v1/continue', rule="introspection:continue", is_public_api=True,
     methods=['POST'])
def api_continue():
    data = flask.request.get_json(force=True)#检查是否是json格式的数据
    if not isinstance(data, dict):
        raise utils.Error(_('Invalid data: expected a JSON object, got %s') %
                          data.__class__.__name__)
 
    logged_data = {k: (v if k not in _LOGGING_EXCLUDED_KEYS else '<hidden>')
                   for k, v in data.items()}
    LOG.debug("Received data from the ramdisk: %s", logged_data,
              data=data)
 
    return flask.jsonify(process.process(data))#跳转到process函数,处理来自ipa的自省数据
ironic_inspector/main.py
def process(introspection_data):
    unprocessed_data = copy.deepcopy(introspection_data)
    failures = []
    _run_pre_hooks(introspection_data, failures)
    node_info = _find_node_info(introspection_data, failures)#根据ipmi_address和macs获取inpsector node
    if node_info:
        node_info.acquire_lock()
    ......
    utils.executor().submit(_store_unprocessed_data, node_info,
                        unprocessed_data)#多线程处理_store_unprocessed_data函数,存储数据
    try:
        node = node_info.node()#从node_info中提取node
    .....
    try:
        result = _process_node(node_info, node, introspection_data) #跳转到_process_node函数,该函数会重复检查node信息
  
def _process_node(node_info, node, introspection_data):
    ir_utils.check_provision_state(node)#检查node自省状态
    _run_post_hooks(node_info, introspection_data)
    _store_data(node_info, introspection_data)#存储数据,如ironic-inspector配置中store_data为none,则不存储
  
    ironic = ir_utils.get_client()#调用ironic client
    pxe_filter.driver().sync(ironic)
     
    node_info.invalidate_cache()#清除所有缓存的信息,以便下次重新加载
    rules.apply(node_info, introspection_data)#对node应用规则
  
    resp = {'uuid': node.uuid}
    #结束inspect流程,调用_finish函数,该函数处理关闭电源的过程
    utils.executor().submit(_finish, node_info, ironic, introspection_data,
                        power_off=CONF.processing.power_off)
    return resp
  
def _finish(node_info, ironic, introspection_data, power_off=True):
    if power_off:
        LOG.debug('Forcing power off of node %s', node_info.uuid)
    try:
        ironic.node.set_power_state(node_info.uuid, 'off')
    except Exception as exc:
        if node_info.node().provision_state == 'enroll':
            LOG.info("Failed to power off the node in"
                     "'enroll' state, ignoring; error was "
                     "%s", exc, node_info=node_info,
                     data=introspection_data)
        else:
            msg = (_('Failed to power off node %(node)s, check '
                     'its power management configuration: '
                     '%(exc)s') % {'node': node_info.uuid, 'exc':
                                   exc})
            raise utils.Error(msg, node_info=node_info,
                              data=introspection_data)
    LOG.info('Node powered-off', node_info=node_info,
             data=introspection_data)
 
node_info.finished(istate.Events.finish)
LOG.info('Introspection finished successfully',
         node_info=node_info, data=introspection_data)
ironic_inspector.process.py

 

推荐阅读