这篇教程C++ unlock_slurmctld函数代码示例写得很实用,希望能帮到您。
本文整理汇总了C++中unlock_slurmctld函数的典型用法代码示例。如果您正苦于以下问题:C++ unlock_slurmctld函数的具体用法?C++ unlock_slurmctld怎么用?C++ unlock_slurmctld使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。 在下文中一共展示了unlock_slurmctld函数的24个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。 示例1: info/* The timeslicer thread */static void *_timeslicer_thread(void *arg){ /* Write locks on job and read lock on nodes */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; ListIterator part_iterator; struct gs_part *p_ptr; if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: starting timeslicer loop"); while (!thread_shutdown) { _slice_sleep(); if (thread_shutdown) break; lock_slurmctld(job_write_lock); pthread_mutex_lock(&data_mutex); list_sort(gs_part_list, _sort_partitions); /* scan each partition... */ if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) info("gang: _timeslicer_thread: scanning partitions"); part_iterator = list_iterator_create(gs_part_list); while ((p_ptr = (struct gs_part *) list_next(part_iterator))) { if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) { info("gang: _timeslicer_thread: part %s: " "run %u total %u", p_ptr->part_name, p_ptr->jobs_active, p_ptr->num_jobs); } if (p_ptr->jobs_active < (p_ptr->num_jobs + p_ptr->num_shadows)) { _cycle_job_list(p_ptr); } } list_iterator_destroy(part_iterator); pthread_mutex_unlock(&data_mutex); /* Preempt jobs that were formerly only suspended */ _preempt_job_dequeue(); /* MUST BE OUTSIDE data_mutex lock */ unlock_slurmctld(job_write_lock); } timeslicer_thread_id = (pthread_t) 0; pthread_exit((void *) 0); return NULL;}
开发者ID:corburn,项目名称:slurm,代码行数:47,
示例2: suspend_job/* RET 0 on success, -1 on failure */extern int suspend_job(char *cmd_ptr, int *err_code, char **err_msg){ char *arg_ptr, *tmp_char; int slurm_rc; suspend_msg_t msg; uint32_t jobid; static char reply_msg[128]; /* Locks: write job and node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "SUSPENDJOB lacks ARG"; error("wiki: SUSPENDJOB lacks ARG"); return -1; } jobid = strtoul(arg_ptr+4, &tmp_char, 10); if ((tmp_char[0] != '/0') && (!isspace(tmp_char[0]))) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: SUSPENDJOB has invalid jobid"); return -1; } msg.job_id = jobid; msg.op = SUSPEND_JOB; lock_slurmctld(job_write_lock); slurm_rc = job_suspend(&msg, 0, -1, false, (uint16_t)NO_VAL); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to suspend job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u suspended successfully", jobid); *err_msg = reply_msg; return 0;}
开发者ID:Q-Leap-Networks,项目名称:qlustar-slurm,代码行数:44,
示例3: job_notify_wiki/* Notify a job via arbitrary message: * CMD=NOTIFYJOB ARG=<jobid> MSG=<string> * RET 0 on success, -1 on failure */extern int job_notify_wiki(char *cmd_ptr, int *err_code, char **err_msg){ char *arg_ptr, *msg_ptr; int slurm_rc; uint32_t jobid; static char reply_msg[128]; /* Locks: read job */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "NOTIFYJOB lacks ARG="; error("wiki: NOTIFYJOB lacks ARG="); return -1; } arg_ptr += 4; jobid = atol(arg_ptr); msg_ptr = strstr(cmd_ptr, "MSG="); if (msg_ptr == NULL) { *err_code = -300; *err_msg = "NOTIFYJOB lacks MSG="; error("wiki: NOTIFYJOB lacks MSG="); return -1; } msg_ptr += 4; lock_slurmctld(job_read_lock); slurm_rc = _job_notify(jobid, msg_ptr); unlock_slurmctld(job_read_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to notify job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u notified successfully", jobid); *err_msg = reply_msg; return 0;}
开发者ID:Q-Leap-Networks,项目名称:qlustar-slurm,代码行数:46,
示例4: while/* Perform periodic background activities */static void *_bb_agent(void *args){ /* Locks: write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; while (!bb_state.term_flag) { bb_sleep(&bb_state, AGENT_INTERVAL); if (bb_state.term_flag) break; lock_slurmctld(job_write_lock); pthread_mutex_lock(&bb_state.bb_mutex); _load_state(0); _timeout_bb_rec(); pthread_mutex_unlock(&bb_state.bb_mutex); unlock_slurmctld(job_write_lock); } return NULL;}
开发者ID:rohgarg,项目名称:slurm,代码行数:20,
示例5: bg_requeue_job/* block_state_mutex must be unlocked before calling this. */extern void bg_requeue_job(uint32_t job_id, bool wait_for_start){ int rc; slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; /* Wait for the slurmd to begin the batch script, slurm_fail_job() is a no-op if issued prior to the script initiation do clean up just incase the fail job isn't ran. */ if (wait_for_start) sleep(2); lock_slurmctld(job_write_lock); if ((rc = job_requeue(0, job_id, -1, (uint16_t)NO_VAL, false))) { error("Couldn't requeue job %u, failing it: %s", job_id, slurm_strerror(rc)); job_fail(job_id); } unlock_slurmctld(job_write_lock);}
开发者ID:alepharchives,项目名称:slurm,代码行数:21,
示例6: _load_config/* backfill_agent - detached thread periodically attempts to backfill jobs */extern void *backfill_agent(void *args){ struct timeval tv1, tv2; char tv_str[20]; time_t now; double wait_time; static time_t last_backfill_time = 0; /* Read config and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; _load_config(); last_backfill_time = time(NULL); while (!stop_backfill) { _my_sleep(backfill_interval); if (stop_backfill) break; if (config_flag) { config_flag = false; _load_config(); } now = time(NULL); wait_time = difftime(now, last_backfill_time); if ((wait_time < backfill_interval) || _job_is_completing() || _many_pending_rpcs() || !avail_front_end() || !_more_work(last_backfill_time)) continue; gettimeofday(&tv1, NULL); lock_slurmctld(all_locks); while (_attempt_backfill()) ; last_backfill_time = time(NULL); unlock_slurmctld(all_locks); gettimeofday(&tv2, NULL); _diff_tv_str(&tv1, &tv2, tv_str, 20); if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill: completed, %s", tv_str); } return NULL;}
开发者ID:donaghy1,项目名称:slurm,代码行数:41,
示例7: _yield_locks/* Return non-zero to break the backfill loop if change in job, node or * partition state or the backfill scheduler needs to be stopped. */static int _yield_locks(void){ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; time_t job_update, node_update, part_update; job_update = last_job_update; node_update = last_node_update; part_update = last_part_update; unlock_slurmctld(all_locks); _my_sleep(backfill_interval); lock_slurmctld(all_locks); if ((last_job_update == job_update) && (last_node_update == node_update) && (last_part_update == part_update) && (! stop_backfill) && (! config_flag)) return 0; else return 1;}
开发者ID:donaghy1,项目名称:slurm,代码行数:24,
示例8: spawn_msg_thread/*****************************************************************************/ * spawn message hander thread/*****************************************************************************/extern int spawn_msg_thread(void){ pthread_attr_t thread_attr_msg; slurm_ctl_conf_t *conf; /* Locks: Read configurationn */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; lock_slurmctld(config_read_lock); conf = slurm_conf_lock(); sched_port = conf->dynalloc_port; slurm_conf_unlock(); unlock_slurmctld(config_read_lock); if (sched_port == 0) { error("DynAllocPort == 0, not spawning communication thread"); return SLURM_ERROR; } slurm_mutex_lock( &thread_flag_mutex ); if (thread_running) { error("dynalloc thread already running, not starting another"); slurm_mutex_unlock(&thread_flag_mutex); return SLURM_ERROR; } slurm_attr_init(&thread_attr_msg); if (pthread_create(&msg_thread_id, &thread_attr_msg, _msg_thread, NULL)) fatal("pthread_create %m"); else info("dynalloc: msg thread create successful!"); slurm_attr_destroy(&thread_attr_msg); thread_running = true; slurm_mutex_unlock(&thread_flag_mutex); return SLURM_SUCCESS;}
开发者ID:diorsman,项目名称:slurm,代码行数:42,
示例9: usleep/* We can not invoke update_job_dependency() until the new job record has * been created, hence this sleeping thread modifies the dependent job * later. */static void *_dep_agent(void *args){ struct job_record *job_ptr = (struct job_record *) args; slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK}; char *end_ptr = NULL, *tok; int cnt = 0; usleep(100000); lock_slurmctld(job_write_lock); if (job_ptr && job_ptr->details && (job_ptr->magic == JOB_MAGIC) && job_ptr->comment && strstr(job_ptr->comment, "on:")) { char *new_depend = job_ptr->details->dependency; job_ptr->details->dependency = NULL; update_job_dependency(job_ptr, new_depend); xfree(new_depend); tok = strstr(job_ptr->comment, "on:"); cnt = strtol(tok + 3, &end_ptr, 10); } if (cnt == 0) set_job_prio(job_ptr); unlock_slurmctld(job_write_lock); return NULL;}
开发者ID:BYUHPC,项目名称:slurm,代码行数:27,
示例10: run_backup/* run_backup - this is the backup controller, it should run in standby * mode, assuming control when the primary controller stops responding */void run_backup(slurm_trigger_callbacks_t *callbacks){ int i; uint32_t trigger_type; time_t last_ping = 0; pthread_attr_t thread_attr_sig, thread_attr_rpc; slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; info("slurmctld running in background mode"); takeover = false; last_controller_response = time(NULL); /* default: don't resume if shutdown */ slurmctld_config.resume_backup = false; if (xsignal_block(backup_sigarray) < 0) error("Unable to block signals"); /* * create attached thread to process RPCs */ slurm_attr_init(&thread_attr_rpc); while (pthread_create(&slurmctld_config.thread_id_rpc, &thread_attr_rpc, _background_rpc_mgr, NULL)) { error("pthread_create error %m"); sleep(1); } slurm_attr_destroy(&thread_attr_rpc); /* * create attached thread for signal handling */ slurm_attr_init(&thread_attr_sig); while (pthread_create(&slurmctld_config.thread_id_sig, &thread_attr_sig, _background_signal_hand, NULL)) { error("pthread_create %m"); sleep(1); } slurm_attr_destroy(&thread_attr_sig); trigger_type = TRIGGER_TYPE_BU_CTLD_RES_OP; _trigger_slurmctld_event(trigger_type); for (i = 0; ((i < 5) && (slurmctld_config.shutdown_time == 0)); i++) { sleep(1); /* Give the primary slurmctld set-up time */ } /* repeatedly ping ControlMachine */ while (slurmctld_config.shutdown_time == 0) { sleep(1); /* Lock of slurmctld_conf below not important */ if (slurmctld_conf.slurmctld_timeout && (takeover == false) && (difftime(time(NULL), last_ping) < (slurmctld_conf.slurmctld_timeout / 3))) continue; last_ping = time(NULL); if (_ping_controller() == 0) last_controller_response = time(NULL); else if (takeover) { /* in takeover mode, take control as soon as */ /* primary no longer respond */ break; } else { uint32_t timeout; lock_slurmctld(config_read_lock); timeout = slurmctld_conf.slurmctld_timeout; unlock_slurmctld(config_read_lock); if (difftime(time(NULL), last_controller_response) > timeout) { break; } } } if (slurmctld_config.shutdown_time != 0) { /* Since pidfile is created as user root (its owner is * changed to SlurmUser) SlurmUser may not be able to * remove it, so this is not necessarily an error. * No longer need slurmctld_conf lock after above join. */ if (unlink(slurmctld_conf.slurmctld_pidfile) < 0) verbose("Unable to remove pidfile '%s': %m", slurmctld_conf.slurmctld_pidfile); info("BackupController terminating"); pthread_join(slurmctld_config.thread_id_sig, NULL); log_fini(); if (dump_core) abort(); else exit(0); } lock_slurmctld(config_read_lock);//.........这里部分代码省略.........
开发者ID:bingzhang,项目名称:slurm,代码行数:101,
示例11: slurm_mutex_lock/* Checkpoint processing pthread * Never returns, but is cancelled on plugin termiantion */static void *_ckpt_agent_thr(void *arg){ struct ckpt_req *req = (struct ckpt_req *)arg; int rc; /* Locks: write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; struct job_record *job_ptr; struct step_record *step_ptr; struct check_job_info *check_ptr; /* only perform ckpt operation of ONE JOB */ slurm_mutex_lock(&ckpt_agent_mutex); while (ckpt_agent_jobid && ckpt_agent_jobid != req->job_id) { pthread_cond_wait(&ckpt_agent_cond, &ckpt_agent_mutex); } ckpt_agent_jobid = req->job_id; ckpt_agent_count ++; slurm_mutex_unlock(&ckpt_agent_mutex); debug3("checkpoint/blcr: sending checkpoint tasks request %u to %u.%u", req->op, req->job_id, req->step_id); rc = checkpoint_tasks(req->job_id, req->step_id, req->begin_time, req->image_dir, req->wait, req->nodelist); if (rc != SLURM_SUCCESS) { error("checkpoint/blcr: error on checkpoint request %u to " "%u.%u: %s", req->op, req->job_id, req->step_id, slurm_strerror(rc)); } if (req->op == CHECK_REQUEUE) _requeue_when_finished(req->job_id); lock_slurmctld(job_write_lock); job_ptr = find_job_record(req->job_id); if (!job_ptr) { error("_ckpt_agent_thr: job finished"); goto out; } if (req->step_id == SLURM_BATCH_SCRIPT) { /* batch job */ check_ptr = (struct check_job_info *)job_ptr->check_job; } else { step_ptr = find_step_record(job_ptr, req->step_id); if (! step_ptr) { error("_ckpt_agent_thr: step finished"); goto out; } check_ptr = (struct check_job_info *)step_ptr->check_job; } check_ptr->time_stamp = 0; check_ptr->error_code = rc; if (check_ptr->error_code != SLURM_SUCCESS) check_ptr->error_msg = xstrdup(slurm_strerror(rc)); out: unlock_slurmctld(job_write_lock); if (req->sig_done) { _send_sig(req->job_id, req->step_id, req->sig_done, req->nodelist); } _on_ckpt_complete(req->gid, req->uid, req->job_id, req->step_id, req->image_dir, rc); slurm_mutex_lock(&ckpt_agent_mutex); ckpt_agent_count --; if (ckpt_agent_count == 0) { ckpt_agent_jobid = 0; pthread_cond_broadcast(&ckpt_agent_cond); } slurm_mutex_unlock(&ckpt_agent_mutex); _ckpt_req_free(req); return NULL;}
开发者ID:Cray,项目名称:slurm,代码行数:77,
示例12: job_modify_wiki//.........这里部分代码省略......... if (feature_ptr) { feature_ptr[9] = ':'; feature_ptr += 10; null_term(feature_ptr); } if (gres_ptr) { gres_ptr[4] = ':'; gres_ptr += 5; null_term(gres_ptr); } if (host_ptr) { host_ptr[8] = ':'; host_ptr += 9; null_term(host_ptr); } if (name_ptr) { name_ptr[7] = ':'; name_ptr += 8; if (name_ptr[0] == '/"') { name_ptr++; for (i=0; ; i++) { if (name_ptr[i] == '/0') break; if (name_ptr[i] == '/"') { name_ptr[i] = '/0'; break; } } } else if (name_ptr[0] == '/'') { name_ptr++; for (i=0; ; i++) { if (name_ptr[i] == '/0') break; if (name_ptr[i] == '/'') { name_ptr[i] = '/0'; break; } } } else null_term(name_ptr); } if (start_ptr) { start_ptr[12] = ':'; start_ptr += 13; null_term(start_ptr); } if (nodes_ptr) { nodes_ptr[5] = ':'; nodes_ptr += 6; new_node_cnt = strtoul(nodes_ptr, NULL, 10); } if (part_ptr) { part_ptr[9] = ':'; part_ptr += 10; null_term(part_ptr); } if (time_ptr) { time_ptr[9] = ':'; time_ptr += 10; new_time_limit = strtoul(time_ptr, NULL, 10); } if (env_ptr) { env_ptr[12] = ':'; env_ptr += 13; null_term(env_ptr); } if (wckey_ptr) { wckey_ptr[5] = ':'; wckey_ptr += 6; null_term(wckey_ptr); } /* Look for any un-parsed "=" ignoring anything after VARIABLELIST * which is expected to contain "=" in its value*/ tmp_char = strchr(cmd_ptr, '='); if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) { tmp_char[0] = '/0'; while (tmp_char[-1] && (!isspace(tmp_char[-1]))) tmp_char--; error("wiki: Invalid MODIFYJOB option %s", tmp_char); } lock_slurmctld(job_write_lock); slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr, new_node_cnt, part_ptr, new_time_limit, name_ptr, start_ptr, feature_ptr, env_ptr, comment_ptr, gres_ptr, wckey_ptr); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to modify job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u modified successfully", jobid); *err_msg = reply_msg; return 0;}
开发者ID:Xarthisius,项目名称:slurm,代码行数:101,
示例13: job_modify_wiki/* Modify a job: * CMD=MODIFYJOB ARG=<jobid> PARTITION=<name> NODES=<number> * DEPEND=afterany:<jobid> TIMELIMT=<seconds> BANK=<name> * RET 0 on success, -1 on failure */extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg){ char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr; char *host_ptr, *part_ptr, *time_ptr, *tmp_char; int slurm_rc; uint32_t jobid, new_node_cnt = 0, new_time_limit = 0; static char reply_msg[128]; /* Locks: write job, read node and partition info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, NO_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "MODIFYJOB lacks ARG="; error("wiki: MODIFYJOB lacks ARG="); return -1; } /* Change all parsed "=" to ":" then search for remaining "=" * and report results as unrecognized options */ arg_ptr[3] = ':'; arg_ptr += 4; jobid = strtoul(arg_ptr, &tmp_char, 10); if ((tmp_char[0] != '/0') && (!isspace(tmp_char[0]))) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: MODIFYJOB has invalid jobid"); return -1; } bank_ptr = strstr(cmd_ptr, "BANK="); depend_ptr = strstr(cmd_ptr, "DEPEND="); host_ptr = strstr(cmd_ptr, "HOSTLIST="); nodes_ptr = strstr(cmd_ptr, "NODES="); part_ptr = strstr(cmd_ptr, "PARTITION="); time_ptr = strstr(cmd_ptr, "TIMELIMIT="); if (bank_ptr) { bank_ptr[4] = ':'; bank_ptr += 5; null_term(bank_ptr); } if (depend_ptr) { depend_ptr[6] = ':'; depend_ptr += 7; null_term(depend_ptr); } if (host_ptr) { host_ptr[8] = ':'; host_ptr += 9; null_term(bank_ptr); } if (nodes_ptr) { nodes_ptr[5] = ':'; nodes_ptr += 6; new_node_cnt = strtoul(nodes_ptr, NULL, 10); } if (part_ptr) { part_ptr[9] = ':'; part_ptr += 10; null_term(part_ptr); } if (time_ptr) { time_ptr[9] = ':'; time_ptr += 10; new_time_limit = strtoul(time_ptr, NULL, 10); } /* Look for any un-parsed "=" */ tmp_char = strchr(cmd_ptr, '='); if (tmp_char) { tmp_char[0] = '/0'; while (tmp_char[-1] && (!isspace(tmp_char[-1]))) tmp_char--; error("wiki: Invalid MODIFYJOB option %s", tmp_char); } lock_slurmctld(job_write_lock); slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr, new_node_cnt, part_ptr, new_time_limit); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to modify job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u modified successfully", jobid); *err_msg = reply_msg; return 0;}
开发者ID:rathamahata,项目名称:slurm,代码行数:95,
示例14: debug/* * init_power_save - Initialize the power save module. Started as a * pthread. Terminates automatically at slurmctld shutdown time. * Input and output are unused. */static void *_init_power_save(void *arg){ /* Locks: Read nodes */ slurmctld_lock_t node_read_lock = { NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; /* Locks: Write nodes */ slurmctld_lock_t node_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; time_t now, boot_time = 0, last_power_scan = 0; if (power_save_config && !power_save_enabled) { debug("power_save mode not enabled"); return NULL; } suspend_node_bitmap = bit_alloc(node_record_count); resume_node_bitmap = bit_alloc(node_record_count); while (slurmctld_config.shutdown_time == 0) { sleep(1); if (_reap_procs() < 2) { debug("power_save programs getting backlogged"); continue; } if ((last_config != slurmctld_conf.last_update) && (_init_power_config())) { info("power_save mode has been disabled due to " "configuration changes"); goto fini; } now = time(NULL); if (boot_time == 0) boot_time = now; /* Only run every 60 seconds or after a node state change, * whichever happens first */ if ((last_node_update >= last_power_scan) || (now >= (last_power_scan + 60))) { lock_slurmctld(node_write_lock); _do_power_work(now); unlock_slurmctld(node_write_lock); last_power_scan = now; } if (slurmd_timeout && (now > (boot_time + (slurmd_timeout / 2)))) { lock_slurmctld(node_read_lock); _re_wake(); unlock_slurmctld(node_read_lock); /* prevent additional executions */ boot_time += (365 * 24 * 60 * 60); slurmd_timeout = 0; } }fini: _clear_power_config(); FREE_NULL_BITMAP(suspend_node_bitmap); FREE_NULL_BITMAP(resume_node_bitmap); _shutdown_power(); slurm_mutex_lock(&power_mutex); power_save_enabled = false; pthread_cond_signal(&power_cond); slurm_mutex_unlock(&power_mutex); pthread_exit(NULL); return NULL;}
开发者ID:edsw,项目名称:slurm,代码行数:74,
示例15: _start_job//.........这里部分代码省略......... job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); for (i = 0, ll = -1; i < bsize; i++) { if (!bit_test(new_bitmap, i)) continue; ll++; node_name = node_record_table_ptr[i].name; node_name_len = strlen(node_name); if (node_name_len == 0) continue; node_cur = tasklist; while (*node_cur) { if ((node_idx = strstr(node_cur, node_name))) { if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '/0')) { job_ptr->details-> req_node_layout[ll] += cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) continue; } break; } } } /* save and update job state to start now */ save_req_nodes = job_ptr->details->req_nodes; job_ptr->details->req_nodes = new_node_list; save_req_bitmap = job_ptr->details->req_node_bitmap; job_ptr->details->req_node_bitmap = new_bitmap; old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; fini: unlock_slurmctld(job_write_lock); if (rc) return rc; /* No errors so far */ (void) schedule(INFINITE); /* provides own locking */ /* Check to insure the job was actually started */ lock_slurmctld(job_write_lock); if (job_ptr->job_id != jobid) job_ptr = find_job_record(jobid); if (job_ptr && (job_ptr->job_id == jobid) && (!IS_JOB_RUNNING(job_ptr))) { uint16_t wait_reason = 0; char *wait_string; if (IS_JOB_FAILED(job_ptr)) wait_string = "Invalid request, job aborted"; else { wait_reason = job_ptr->state_reason; if (wait_reason == WAIT_HELD) { /* some job is completing, slurmctld did * not even try to schedule this job */ wait_reason = WAIT_RESOURCES; } wait_string = job_reason_string(wait_reason); job_ptr->state_reason = WAIT_HELD; xfree(job_ptr->state_desc); } *err_code = -910 - wait_reason; snprintf(tmp_msg, sizeof(tmp_msg), "Could not start job %u(%s): %s", jobid, new_node_list, wait_string); *err_msg = tmp_msg; error("wiki: %s", tmp_msg); /* restore some of job state */ job_ptr->priority = 0; job_ptr->details->min_cpus = old_task_cnt; rc = -1; } if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) { /* Restore required node list in case job requeued */ xfree(job_ptr->details->req_nodes); job_ptr->details->req_nodes = save_req_nodes; FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); job_ptr->details->req_node_bitmap = save_req_bitmap; FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); xfree(job_ptr->details->req_node_layout); } else { error("wiki: start_job(%u) job missing", jobid); xfree(save_req_nodes); FREE_NULL_BITMAP(save_req_bitmap); } unlock_slurmctld(job_write_lock); schedule_node_save(); /* provides own locking */ schedule_job_save(); /* provides own locking */ return rc;}
开发者ID:VURM,项目名称:slurm,代码行数:101,
示例16: dump_all_front_end_state/* dump_all_front_end_state - save the state of all front_end nodes to file */extern int dump_all_front_end_state(void){#ifdef HAVE_FRONT_END /* Save high-water mark to avoid buffer growth with copies */ static int high_buffer_size = (1024 * 1024); int error_code = 0, i, log_fd; char *old_file, *new_file, *reg_file; front_end_record_t *front_end_ptr; /* Locks: Read config and node */ slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; Buf buffer = init_buf(high_buffer_size); DEF_TIMERS; START_TIMER; /* write header: version, time */ packstr(FRONT_END_STATE_VERSION, buffer); pack_time(time(NULL), buffer); /* write node records to buffer */ lock_slurmctld (node_read_lock); for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); _dump_front_end_state(front_end_ptr, buffer); } old_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (old_file, "/front_end_state.old"); reg_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (reg_file, "/front_end_state"); new_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (new_file, "/front_end_state.new"); unlock_slurmctld (node_read_lock); /* write the buffer to file */ lock_state_files(); log_fd = creat (new_file, 0600); if (log_fd < 0) { error ("Can't save state, error creating file %s %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "front_end"); if (rc && !error_code) error_code = rc; } if (error_code) (void) unlink (new_file); else { /* file shuffle */ (void) unlink (old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink (reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink (new_file); } xfree (old_file); xfree (reg_file); xfree (new_file); unlock_state_files (); free_buf (buffer); END_TIMER2("dump_all_front_end_state"); return error_code;#else return SLURM_SUCCESS;#endif}
开发者ID:Cray,项目名称:slurm,代码行数:87,
示例17: pthread_setcancelstate/* _background_rpc_mgr - Read and process incoming RPCs to the background * controller (that's us) */static void *_background_rpc_mgr(void *no_data){ slurm_fd_t newsockfd; slurm_fd_t sockfd; slurm_addr_t cli_addr; slurm_msg_t *msg = NULL; int error_code; char* node_addr = NULL; /* Read configuration only */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; int sigarray[] = {SIGUSR1, 0}; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid()); /* initialize port for RPCs */ lock_slurmctld(config_read_lock); /* set node_addr to bind to (NULL means any) */ if ((strcmp(slurmctld_conf.backup_controller, slurmctld_conf.backup_addr) != 0)) { node_addr = slurmctld_conf.backup_addr ; } if ((sockfd = slurm_init_msg_engine_addrname_port(node_addr, slurmctld_conf. slurmctld_port)) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_addrname_port error %m"); unlock_slurmctld(config_read_lock); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal * handler thread upon receipt of SIGABRT, SIGINT, * or SIGTERM. That thread does all processing of * all signals. */ xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sigarray); /* * Process incoming RPCs indefinitely */ while (slurmctld_config.shutdown_time == 0) { /* accept needed for stream implementation * is a no-op in message implementation that just passes * sockfd to newsockfd */ if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("slurm_accept_msg_conn: %m"); continue; } msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); if (slurm_receive_msg(newsockfd, msg, 0) != 0) error("slurm_receive_msg: %m"); error_code = _background_process_msg(msg); if ((error_code == SLURM_SUCCESS) && (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE) && (slurmctld_config.shutdown_time == 0)) slurmctld_config.shutdown_time = time(NULL); slurm_free_msg_data(msg->msg_type, msg->data); slurm_free_msg(msg); slurm_close(newsockfd); /* close new socket */ } debug3("_background_rpc_mgr shutting down"); slurm_close(sockfd); /* close the main socket */ pthread_exit((void *) 0); return NULL;}
开发者ID:bingzhang,项目名称:slurm,代码行数:81,
示例18: _start_agent//.........这里部分代码省略......... bg_record->bg_block_id, RM_MODIFY_Options, conn_type)) != SLURM_SUCCESS) error("bridge_set_data(RM_MODIFY_Options): %s", bg_err_str(rc)); }#endif if ((rc = bridge_block_modify(bg_record->bg_block_id, RM_MODIFY_MloaderImg, bg_record->mloaderimage)) != SLURM_SUCCESS) error("bridge_block_modify(RM_MODIFY_MloaderImg): %s", bg_err_str(rc));#endif bg_record->modifying = 0; }no_reboot: if (bg_record->state == BG_BLOCK_FREE) { if ((rc = bridge_block_boot(bg_record)) != SLURM_SUCCESS) { char reason[200]; bg_record->boot_state = 0; bg_record->boot_count = 0; if (rc == BG_ERROR_INVALID_STATE) snprintf(reason, sizeof(reason), "Block %s is in an incompatible " "state. This usually means " "hardware is allocated " "by another block (maybe outside " "of SLURM).", bg_record->bg_block_id); else snprintf(reason, sizeof(reason), "Couldn't boot block %s: %s", bg_record->bg_block_id, bg_err_str(rc)); slurm_mutex_unlock(&block_state_mutex); requeue_and_error(bg_record, reason); return; } } else if (bg_record->state == BG_BLOCK_BOOTING) {#ifdef HAVE_BG_FILES bg_record->boot_state = 1;#else if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); bg_record->state = BG_BLOCK_INITED; last_bg_update = time(NULL);#endif } if ((bg_record->job_running <= NO_JOB_RUNNING) && !find_job_in_bg_record(bg_record, req_job_id)) { slurm_mutex_unlock(&block_state_mutex); debug("job %u finished during the start of the boot " "(everything is ok)", req_job_id); return; } /* Don't reset boot_count, it will be reset when state changes, and needs to outlast a job allocation. */ /* bg_record->boot_count = 0; */ if (bg_record->state == BG_BLOCK_INITED) { debug("block %s is already ready.", bg_record->bg_block_id); /* Just in case reset the boot flags */ bg_record->boot_state = 0; bg_record->boot_count = 0; set_user_rc = bridge_block_sync_users(bg_record); block_inited = 1; } slurm_mutex_unlock(&block_state_mutex); /* This lock needs to happen after the block_state_mutex to avoid deadlock. */ if (block_inited && bg_action_ptr->job_ptr) { slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; lock_slurmctld(job_write_lock); bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING); last_job_update = time(NULL); unlock_slurmctld(job_write_lock); } if (set_user_rc == SLURM_ERROR) { sleep(2); /* wait for the slurmd to begin the batch script, slurm_fail_job() is a no-op if issued prior to the script initiation do clean up just in case the fail job isn't ran */ (void) slurm_fail_job(req_job_id, JOB_BOOT_FAIL); }}
开发者ID:HPCNow,项目名称:slurm,代码行数:101,
示例19: _notify_slurmctld_nodesstatic void _notify_slurmctld_nodes(agent_info_t *agent_ptr, int no_resp_cnt, int retry_cnt){ ListIterator itr = NULL; ret_data_info_t *ret_data_info = NULL; state_t state; int is_ret_list = 1; /* Locks: Read config, write job, write node */ slurmctld_lock_t node_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; thd_t *thread_ptr = agent_ptr->thread_struct; int i; /* Notify slurmctld of non-responding nodes */ if (no_resp_cnt) { /* Update node table data for non-responding nodes */ lock_slurmctld(node_write_lock); if (agent_ptr->msg_type == REQUEST_BATCH_JOB_LAUNCH) { /* Requeue the request */ batch_job_launch_msg_t *launch_msg_ptr = *agent_ptr->msg_args_pptr; uint32_t job_id = launch_msg_ptr->job_id; job_complete(job_id, 0, true, false, 0); } unlock_slurmctld(node_write_lock); } if (retry_cnt && agent_ptr->retry) _queue_agent_retry(agent_ptr, retry_cnt); /* Update last_response on responding nodes */ lock_slurmctld(node_write_lock); for (i = 0; i < agent_ptr->thread_count; i++) { char *down_msg, *node_names; if (!thread_ptr[i].ret_list) { state = thread_ptr[i].state; is_ret_list = 0; goto switch_on_state; } is_ret_list = 1; itr = list_iterator_create(thread_ptr[i].ret_list); while ((ret_data_info = list_next(itr))) { state = ret_data_info->err; switch_on_state: switch(state) { case DSH_NO_RESP: if (!is_ret_list) { node_not_resp(thread_ptr[i].nodelist, thread_ptr[i]. start_time); } else { node_not_resp(ret_data_info->node_name, thread_ptr[i].start_time); } break; case DSH_FAILED: if (is_ret_list) node_names = ret_data_info->node_name; else node_names = thread_ptr[i].nodelist;#ifdef HAVE_FRONT_END down_msg = "";#else set_node_down(node_names, "Prolog/Epilog failure"); down_msg = ", set to state DOWN";#endif error("Prolog/Epilog failure on nodes %s%s", node_names, down_msg); break; case DSH_DONE: if (!is_ret_list) node_did_resp(thread_ptr[i].nodelist); else node_did_resp(ret_data_info->node_name); break; default: if (!is_ret_list) { error("unknown state returned for %s", thread_ptr[i].nodelist); } else { error("unknown state returned for %s", ret_data_info->node_name); } break; } if (!is_ret_list) goto finished; } list_iterator_destroy(itr);finished: ; } unlock_slurmctld(node_write_lock); if (run_scheduler) { run_scheduler = false; /* below functions all have their own locking */ if (schedule(0)) { schedule_job_save(); schedule_node_save(); }//.........这里部分代码省略.........
开发者ID:alepharchives,项目名称:slurm,代码行数:101,
示例20: xassert//.........这里部分代码省略......... goto cleanup; } } //info("sending %u to %s", msg_type, thread_ptr->nodelist); if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) { thread_state = DSH_DONE; } else { if (!srun_agent) _comm_err(thread_ptr->nodelist, msg_type); } goto cleanup; } //info("got %d messages back", list_count(ret_list)); found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr)) != NULL) { rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); /* SPECIAL CASE: Mark node as IDLE if job already complete */ if (is_kill_msg && (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) { kill_job_msg_t *kill_job; kill_job = (kill_job_msg_t *) task_ptr->msg_args_ptr; rc = SLURM_SUCCESS; lock_slurmctld(job_write_lock); if (job_epilog_complete(kill_job->job_id, ret_data_info-> node_name, rc)) run_scheduler = true; unlock_slurmctld(job_write_lock); } /* SPECIAL CASE: Kill non-startable batch job, * Requeue the job on ESLURMD_PROLOG_FAILED */ if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) && (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) && (ret_data_info->type != RESPONSE_FORWARD_FAILED)) { batch_job_launch_msg_t *launch_msg_ptr = task_ptr->msg_args_ptr; uint32_t job_id = launch_msg_ptr->job_id; info("Killing non-startable batch job %u: %s", job_id, slurm_strerror(rc)); thread_state = DSH_DONE; ret_data_info->err = thread_state; lock_slurmctld(job_write_lock); job_complete(job_id, 0, false, false, _wif_status()); unlock_slurmctld(job_write_lock); continue; } if (((msg_type == REQUEST_SIGNAL_TASKS) || (msg_type == REQUEST_TERMINATE_TASKS)) && (rc == ESRCH)) { /* process is already dead, not a real error */ rc = SLURM_SUCCESS; } switch (rc) { case SLURM_SUCCESS: /* debug("agent processed RPC to node %s", */ /* ret_data_info->node_name); */ thread_state = DSH_DONE; break;
开发者ID:alepharchives,项目名称:slurm,代码行数:67,
示例21: dump_all_part_state/* dump_all_part_state - save the state of all partitions to file */int dump_all_part_state(void){ /* Save high-water mark to avoid buffer growth with copies */ static int high_buffer_size = BUF_SIZE; ListIterator part_iterator; struct part_record *part_ptr; int error_code = 0, log_fd; char *old_file, *new_file, *reg_file; /* Locks: Read partition */ slurmctld_lock_t part_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; Buf buffer = init_buf(high_buffer_size); DEF_TIMERS; START_TIMER; /* write header: time */ packstr(PART_STATE_VERSION, buffer); pack_time(time(NULL), buffer); /* write partition records to buffer */ lock_slurmctld(part_read_lock); part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { xassert (part_ptr->magic == PART_MAGIC); _dump_part_state(part_ptr, buffer); } list_iterator_destroy(part_iterator); old_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(old_file, "/part_state.old"); reg_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(reg_file, "/part_state"); new_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(new_file, "/part_state.new"); unlock_slurmctld(part_read_lock); /* write the buffer to file */ lock_state_files(); log_fd = creat(new_file, 0600); if (log_fd < 0) { error("Can't save state, error creating file %s, %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "partition"); if (rc && !error_code) error_code = rc; } if (error_code) (void) unlink(new_file); else { /* file shuffle */ (void) unlink(old_file); if (link(reg_file, old_file)) { debug4("unable to create link for %s -> %s: %m", reg_file, old_file); } (void) unlink(reg_file); if (link(new_file, reg_file)) { debug4("unable to create link for %s -> %s: %m", new_file, reg_file); } (void) unlink(new_file); } xfree(old_file); xfree(reg_file); xfree(new_file); unlock_state_files(); free_buf(buffer); END_TIMER2("dump_all_part_state"); return 0;}
开发者ID:kwangiit,项目名称:SLURMPP,代码行数:87,
示例22: job_will_run2//.........这里部分代码省略......... */extern int job_will_run2(char *cmd_ptr, int *err_code, char **err_msg){ char *arg_ptr, *buf, *tmp_buf, *tmp_char; int preemptee_cnt = 0; uint32_t jobid, *preemptee = NULL, tmp_id; time_t start_time; char *avail_nodes = NULL; /* Locks: write job, read node and partition info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "JOBWILLRUN lacks ARG"; error("wiki: JOBWILLRUN lacks ARG"); return -1; } arg_ptr += 4; jobid = strtoul(arg_ptr, &tmp_char, 10); if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0')) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: JOBWILLRUN has invalid ARG value"); return -1; } arg_ptr = strstr(cmd_ptr, "STARTTIME="); if (arg_ptr) { arg_ptr += 10; start_time = strtoul(arg_ptr, &tmp_char, 10); if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0')) { *err_code = -300; *err_msg = "Invalid STARTTIME value"; error("wiki: JOBWILLRUN has invalid STARTTIME value"); return -1; } } else { start_time = time(NULL); } arg_ptr = strstr(cmd_ptr, "PREEMPT="); if (arg_ptr) { arg_ptr += 8; preemptee = xmalloc(sizeof(uint32_t) * strlen(arg_ptr)); while (1) { tmp_id = strtoul(arg_ptr, &tmp_char, 10); if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0') && (tmp_char[0] != ',')) { *err_code = -300; *err_msg = "Invalid PREEMPT value"; error("wiki: JOBWILLRUN has invalid PREEMPT " "value"); xfree(preemptee); xfree(avail_nodes); return -1; } preemptee[preemptee_cnt++] = tmp_id; if (tmp_char[0] != ',') break; arg_ptr = tmp_char + 1; } } /* Keep this last, since we modify the input string */ arg_ptr = strstr(cmd_ptr, "NODES="); if (arg_ptr) { arg_ptr += 6; avail_nodes = xstrdup(arg_ptr); arg_ptr = strchr(avail_nodes, ' '); if (arg_ptr) arg_ptr[0] = '/0'; } else { *err_code = -300; *err_msg = "Missing NODES value"; error("wiki: JOBWILLRUN lacks NODES value"); xfree(preemptee); return -1; } lock_slurmctld(job_write_lock); buf = _will_run_test2(jobid, start_time, avail_nodes, preemptee, preemptee_cnt, err_code, err_msg); unlock_slurmctld(job_write_lock); xfree(preemptee); xfree(avail_nodes); if (!buf) return -1; tmp_buf = xmalloc(strlen(buf) + 32); sprintf(tmp_buf, "SC=0 ARG=%s", buf); xfree(buf); *err_code = 0; *err_msg = tmp_buf; return 0;}
开发者ID:beninim,项目名称:slurm_simulator,代码行数:101,
示例23: _proc_msgstatic void _proc_msg(int new_fd, char *msg, slurm_addr_t cli_addr){ /* Locks: Read job and node data */ slurmctld_lock_t job_read_lock = { NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; /* Locks: Write job */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; /* Locks: Write job, write node, read partition */ slurmctld_lock_t job_write_lock2 = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; /* Locks: Write node data */ slurmctld_lock_t node_write_lock = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, READ_LOCK }; char *cmd_ptr, *resp = NULL, *msg_decrypted = NULL; uid_t cmd_uid; uint32_t protocol_version = 0; if (!msg) { info("slurmctld/nonstop: NULL message received"); resp = xstrdup("Error:/"NULL message received/""); goto send_resp; } msg_decrypted = _decrypt(msg, &cmd_uid); if (!msg_decrypted) { info("slurmctld/nonstop: Message decrypt failure"); resp = xstrdup("Error:/"Message decrypt failure/""); goto send_resp; } if (nonstop_debug > 0) info("slurmctld/nonstop: msg decrypted:%s", msg_decrypted); cmd_ptr = msg_decrypted; /* 123456789012345678901234567890 */ if (xstrncmp(cmd_ptr, version_string, 13) == 0) { cmd_ptr = strchr(cmd_ptr + 13, ':'); if (cmd_ptr) { cmd_ptr++; protocol_version = SLURM_PROTOCOL_VERSION; } } if (protocol_version == 0) { info("slurmctld/nonstop: Message version invalid"); resp = xstrdup("Error:/"Message version invalid/""); goto send_resp; } if (xstrncmp(cmd_ptr, "CALLBACK:JOBID:", 15) == 0) { resp = register_callback(cmd_ptr, cmd_uid, cli_addr, protocol_version); } else if (xstrncmp(cmd_ptr, "DRAIN:NODES:", 12) == 0) { lock_slurmctld(node_write_lock); resp = drain_nodes_user(cmd_ptr, cmd_uid, protocol_version); unlock_slurmctld(node_write_lock); } else if (xstrncmp(cmd_ptr, "DROP_NODE:JOBID:", 15) == 0) { lock_slurmctld(job_write_lock2); resp = drop_node(cmd_ptr, cmd_uid, protocol_version); unlock_slurmctld(job_write_lock2); } else if (xstrncmp(cmd_ptr, "GET_FAIL_NODES:JOBID:", 21) == 0) { lock_slurmctld(job_read_lock); resp = fail_nodes(cmd_ptr, cmd_uid, protocol_version); unlock_slurmctld(job_read_lock); } else if (xstrncmp(cmd_ptr, "REPLACE_NODE:JOBID:", 19) == 0) { lock_slurmctld(job_write_lock2); resp = replace_node(cmd_ptr, cmd_uid, protocol_version); unlock_slurmctld(job_write_lock2); } else if (xstrncmp(cmd_ptr, "SHOW_CONFIG", 11) == 0) { resp = show_config(cmd_ptr, cmd_uid, protocol_version); } else if (xstrncmp(cmd_ptr, "SHOW_JOB:JOBID:", 15) == 0) { resp = show_job(cmd_ptr, cmd_uid, protocol_version); } else if (xstrncmp(cmd_ptr, "TIME_INCR:JOBID:", 16) == 0) { lock_slurmctld(job_write_lock); resp = time_incr(cmd_ptr, cmd_uid, protocol_version); unlock_slurmctld(job_write_lock); } else { info("slurmctld/nonstop: Invalid command: %s", cmd_ptr); xstrfmtcat(resp, "%s ECMD", SLURM_VERSION_STRING); } send_resp: if (nonstop_debug > 0) info("slurmctld/nonstop: msg send:%s", resp); _send_reply(new_fd, resp); xfree(resp); if (msg_decrypted) free(msg_decrypted); return;}
开发者ID:artpol84,项目名称:slurm,代码行数:89,
示例24: slurm_conf_lock/*****************************************************************************/ * message hander thread/*****************************************************************************/static void *_msg_thread(void *no_data){ slurm_fd_t sock_fd = -1, new_fd; slurm_addr_t cli_addr; char *msg; slurm_ctl_conf_t *conf; int i; /* Locks: Write configuration, job, node, and partition */ slurmctld_lock_t config_write_lock = { WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK }; conf = slurm_conf_lock(); sched_port = conf->schedport; slurm_conf_unlock(); /* Wait until configuration is completely loaded */ lock_slurmctld(config_write_lock); unlock_slurmctld(config_write_lock); /* If SchedulerPort is already taken, keep trying to open it * once per minute. Slurmctld will continue to function * during this interval even if nothing can be scheduled. */ for (i=0; (!thread_shutdown); i++) { if (i > 0) sleep(60); sock_fd = slurm_init_msg_engine_port(sched_port); if (sock_fd != SLURM_SOCKET_ERROR) break; error("wiki: slurm_init_msg_engine_port %u %m", sched_port); error("wiki: Unable to communicate with Moab"); } /* Process incoming RPCs until told to shutdown */ while (!thread_shutdown) { if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr)) == SLURM_SOCKET_ERROR) { if (errno != EINTR) error("wiki: slurm_accept_msg_conn %m"); continue; } if (thread_shutdown) { close(new_fd); break; } /* It would be nice to create a pthread for each new * RPC, but that leaks memory on some systems when * done from a plugin. * FIXME: Maintain a pool of and reuse them. */ err_code = 0; err_msg = ""; msg = _recv_msg(new_fd); if (msg) { _proc_msg(new_fd, msg); xfree(msg); } slurm_close_accepted_conn(new_fd); } if (sock_fd > 0) (void) slurm_shutdown_msg_engine(sock_fd); pthread_exit((void *) 0); return NULL;}
开发者ID:IFCA,项目名称:slurm,代码行数:66,
注:本文中的unlock_slurmctld函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 C++ unlock_super函数代码示例 C++ unlock_rename函数代码示例 |