您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ unlock_slurmctld函数代码示例

51自学网 2021-06-03 09:11:06
  C++
这篇教程C++ unlock_slurmctld函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中unlock_slurmctld函数的典型用法代码示例。如果您正苦于以下问题:C++ unlock_slurmctld函数的具体用法?C++ unlock_slurmctld怎么用?C++ unlock_slurmctld使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了unlock_slurmctld函数的24个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: info

/* The timeslicer thread */static void *_timeslicer_thread(void *arg){	/* Write locks on job and read lock on nodes */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };	ListIterator part_iterator;	struct gs_part *p_ptr;	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)		info("gang: starting timeslicer loop");	while (!thread_shutdown) {		_slice_sleep();		if (thread_shutdown)			break;		lock_slurmctld(job_write_lock);		pthread_mutex_lock(&data_mutex);		list_sort(gs_part_list, _sort_partitions);		/* scan each partition... */		if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)			info("gang: _timeslicer_thread: scanning partitions");		part_iterator = list_iterator_create(gs_part_list);		while ((p_ptr = (struct gs_part *) list_next(part_iterator))) {			if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG) {				info("gang: _timeslicer_thread: part %s: "				     "run %u total %u", p_ptr->part_name,				     p_ptr->jobs_active, p_ptr->num_jobs);			}			if (p_ptr->jobs_active <			    (p_ptr->num_jobs + p_ptr->num_shadows)) {				_cycle_job_list(p_ptr);			}		}		list_iterator_destroy(part_iterator);		pthread_mutex_unlock(&data_mutex);		/* Preempt jobs that were formerly only suspended */		_preempt_job_dequeue();	/* MUST BE OUTSIDE data_mutex lock */		unlock_slurmctld(job_write_lock);	}	timeslicer_thread_id = (pthread_t) 0;	pthread_exit((void *) 0);	return NULL;}
开发者ID:corburn,项目名称:slurm,代码行数:47,


示例2: suspend_job

/* RET 0 on success, -1 on failure */extern int	suspend_job(char *cmd_ptr, int *err_code, char **err_msg){	char *arg_ptr, *tmp_char;	int slurm_rc;	suspend_msg_t msg;	uint32_t jobid;	static char reply_msg[128];	/* Locks: write job and node info */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };	arg_ptr = strstr(cmd_ptr, "ARG=");	if (arg_ptr == NULL) {		*err_code = -300;		*err_msg = "SUSPENDJOB lacks ARG";		error("wiki: SUSPENDJOB lacks ARG");		return -1;	}	jobid = strtoul(arg_ptr+4, &tmp_char, 10);	if ((tmp_char[0] != '/0') && (!isspace(tmp_char[0]))) {		*err_code = -300;		*err_msg = "Invalid ARG value";		error("wiki: SUSPENDJOB has invalid jobid");		return -1;	}	msg.job_id = jobid;	msg.op = SUSPEND_JOB;	lock_slurmctld(job_write_lock);	slurm_rc = job_suspend(&msg, 0, -1, false, (uint16_t)NO_VAL);	unlock_slurmctld(job_write_lock);	if (slurm_rc != SLURM_SUCCESS) {		*err_code = -700;		*err_msg = slurm_strerror(slurm_rc);		error("wiki: Failed to suspend job %u (%m)", jobid);		return -1;	}	snprintf(reply_msg, sizeof(reply_msg),		"job %u suspended successfully", jobid);	*err_msg = reply_msg;	return 0;}
开发者ID:Q-Leap-Networks,项目名称:qlustar-slurm,代码行数:44,


示例3: job_notify_wiki

/* Notify a job via arbitrary message: *	CMD=NOTIFYJOB ARG=<jobid> MSG=<string> * RET 0 on success, -1 on failure */extern int	job_notify_wiki(char *cmd_ptr, int *err_code, char **err_msg){	char *arg_ptr, *msg_ptr;	int slurm_rc;	uint32_t jobid;	static char reply_msg[128];	/* Locks: read job */	slurmctld_lock_t job_read_lock = {		NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };	arg_ptr = strstr(cmd_ptr, "ARG=");	if (arg_ptr == NULL) {		*err_code = -300;		*err_msg = "NOTIFYJOB lacks ARG=";		error("wiki: NOTIFYJOB lacks ARG=");		return -1;	}	arg_ptr += 4;	jobid = atol(arg_ptr);	msg_ptr = strstr(cmd_ptr, "MSG=");	if (msg_ptr == NULL) {		*err_code = -300;		*err_msg = "NOTIFYJOB lacks MSG=";		error("wiki: NOTIFYJOB lacks MSG=");		return -1;	}	msg_ptr += 4;	lock_slurmctld(job_read_lock);	slurm_rc = _job_notify(jobid, msg_ptr);	unlock_slurmctld(job_read_lock);	if (slurm_rc != SLURM_SUCCESS) {		*err_code = -700;		*err_msg = slurm_strerror(slurm_rc);		error("wiki: Failed to notify job %u (%m)", jobid);		return -1;	}	snprintf(reply_msg, sizeof(reply_msg),		"job %u notified successfully", jobid);	*err_msg = reply_msg;	return 0;}
开发者ID:Q-Leap-Networks,项目名称:qlustar-slurm,代码行数:46,


示例4: while

/* Perform periodic background activities */static void *_bb_agent(void *args){	/* Locks: write job */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };	while (!bb_state.term_flag) {		bb_sleep(&bb_state, AGENT_INTERVAL);		if (bb_state.term_flag)			break;		lock_slurmctld(job_write_lock);		pthread_mutex_lock(&bb_state.bb_mutex);		_load_state(0);		_timeout_bb_rec();		pthread_mutex_unlock(&bb_state.bb_mutex);		unlock_slurmctld(job_write_lock);	}	return NULL;}
开发者ID:rohgarg,项目名称:slurm,代码行数:20,


示例5: bg_requeue_job

/* block_state_mutex must be unlocked before calling this. */extern void bg_requeue_job(uint32_t job_id, bool wait_for_start){	int rc;	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };	/* Wait for the slurmd to begin the batch script, slurm_fail_job()	   is a no-op if issued prior to the script initiation do	   clean up just incase the fail job isn't ran. */	if (wait_for_start)		sleep(2);	lock_slurmctld(job_write_lock);	if ((rc = job_requeue(0, job_id, -1, (uint16_t)NO_VAL, false))) {		error("Couldn't requeue job %u, failing it: %s",		      job_id, slurm_strerror(rc));		job_fail(job_id);	}	unlock_slurmctld(job_write_lock);}
开发者ID:alepharchives,项目名称:slurm,代码行数:21,


示例6: _load_config

/* backfill_agent - detached thread periodically attempts to backfill jobs */extern void *backfill_agent(void *args){	struct timeval tv1, tv2;	char tv_str[20];	time_t now;	double wait_time;	static time_t last_backfill_time = 0;	/* Read config and partitions; Write jobs and nodes */	slurmctld_lock_t all_locks = {		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };	_load_config();	last_backfill_time = time(NULL);	while (!stop_backfill) {		_my_sleep(backfill_interval);		if (stop_backfill)			break;		if (config_flag) {			config_flag = false;			_load_config();		}		now = time(NULL);		wait_time = difftime(now, last_backfill_time);		if ((wait_time < backfill_interval) ||		    _job_is_completing() || _many_pending_rpcs() ||		    !avail_front_end() || !_more_work(last_backfill_time))			continue;		gettimeofday(&tv1, NULL);		lock_slurmctld(all_locks);		while (_attempt_backfill()) ;		last_backfill_time = time(NULL);		unlock_slurmctld(all_locks);		gettimeofday(&tv2, NULL);		_diff_tv_str(&tv1, &tv2, tv_str, 20);		if (debug_flags & DEBUG_FLAG_BACKFILL)			info("backfill: completed, %s", tv_str);	}	return NULL;}
开发者ID:donaghy1,项目名称:slurm,代码行数:41,


示例7: _yield_locks

/* Return non-zero to break the backfill loop if change in job, node or * partition state or the backfill scheduler needs to be stopped. */static int _yield_locks(void){	slurmctld_lock_t all_locks = {		READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK };	time_t job_update, node_update, part_update;	job_update  = last_job_update;	node_update = last_node_update;	part_update = last_part_update;	unlock_slurmctld(all_locks);	_my_sleep(backfill_interval);	lock_slurmctld(all_locks);	if ((last_job_update  == job_update)  &&	    (last_node_update == node_update) &&	    (last_part_update == part_update) &&	    (! stop_backfill) && (! config_flag))		return 0;	else		return 1;}
开发者ID:donaghy1,项目名称:slurm,代码行数:24,


示例8: spawn_msg_thread

/*****************************************************************************/ * spawn message hander thread/*****************************************************************************/extern int spawn_msg_thread(void){    pthread_attr_t thread_attr_msg;    slurm_ctl_conf_t *conf;    /* Locks: Read configurationn */    slurmctld_lock_t config_read_lock = {        READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK    };    lock_slurmctld(config_read_lock);    conf = slurm_conf_lock();    sched_port = conf->dynalloc_port;    slurm_conf_unlock();    unlock_slurmctld(config_read_lock);    if (sched_port == 0) {        error("DynAllocPort == 0, not spawning communication thread");        return SLURM_ERROR;    }    slurm_mutex_lock( &thread_flag_mutex );    if (thread_running) {        error("dynalloc thread already running, not starting another");        slurm_mutex_unlock(&thread_flag_mutex);        return SLURM_ERROR;    }    slurm_attr_init(&thread_attr_msg);    if (pthread_create(&msg_thread_id, &thread_attr_msg,                       _msg_thread, NULL))        fatal("pthread_create %m");    else        info("dynalloc: msg thread create successful!");    slurm_attr_destroy(&thread_attr_msg);    thread_running = true;    slurm_mutex_unlock(&thread_flag_mutex);    return SLURM_SUCCESS;}
开发者ID:diorsman,项目名称:slurm,代码行数:42,


示例9: usleep

/* We can not invoke update_job_dependency() until the new job record has * been created, hence this sleeping thread modifies the dependent job * later. */static void *_dep_agent(void *args){	struct job_record *job_ptr = (struct job_record *) args;	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK};	char *end_ptr = NULL, *tok;	int cnt = 0;	usleep(100000);	lock_slurmctld(job_write_lock);	if (job_ptr && job_ptr->details && (job_ptr->magic == JOB_MAGIC) &&	    job_ptr->comment && strstr(job_ptr->comment, "on:")) {		char *new_depend = job_ptr->details->dependency;		job_ptr->details->dependency = NULL;		update_job_dependency(job_ptr, new_depend);		xfree(new_depend);		tok = strstr(job_ptr->comment, "on:");		cnt = strtol(tok + 3, &end_ptr, 10);	}	if (cnt == 0)		set_job_prio(job_ptr);	unlock_slurmctld(job_write_lock);	return NULL;}
开发者ID:BYUHPC,项目名称:slurm,代码行数:27,


示例10: run_backup

/* run_backup - this is the backup controller, it should run in standby *	mode, assuming control when the primary controller stops responding */void run_backup(slurm_trigger_callbacks_t *callbacks){	int i;	uint32_t trigger_type;	time_t last_ping = 0;	pthread_attr_t thread_attr_sig, thread_attr_rpc;	slurmctld_lock_t config_read_lock = {		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };	slurmctld_lock_t config_write_lock = {		WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK };	info("slurmctld running in background mode");	takeover = false;	last_controller_response = time(NULL);	/* default: don't resume if shutdown */	slurmctld_config.resume_backup = false;	if (xsignal_block(backup_sigarray) < 0)		error("Unable to block signals");	/*	 * create attached thread to process RPCs	 */	slurm_attr_init(&thread_attr_rpc);	while (pthread_create(&slurmctld_config.thread_id_rpc,			      &thread_attr_rpc, _background_rpc_mgr, NULL)) {		error("pthread_create error %m");		sleep(1);	}	slurm_attr_destroy(&thread_attr_rpc);	/*	 * create attached thread for signal handling	 */	slurm_attr_init(&thread_attr_sig);	while (pthread_create(&slurmctld_config.thread_id_sig,			      &thread_attr_sig, _background_signal_hand,			      NULL)) {		error("pthread_create %m");		sleep(1);	}	slurm_attr_destroy(&thread_attr_sig);	trigger_type = TRIGGER_TYPE_BU_CTLD_RES_OP;	_trigger_slurmctld_event(trigger_type);	for (i = 0; ((i < 5) && (slurmctld_config.shutdown_time == 0)); i++) {		sleep(1);       /* Give the primary slurmctld set-up time */	}	/* repeatedly ping ControlMachine */	while (slurmctld_config.shutdown_time == 0) {		sleep(1);		/* Lock of slurmctld_conf below not important */		if (slurmctld_conf.slurmctld_timeout &&		    (takeover == false) &&		    (difftime(time(NULL), last_ping) <		     (slurmctld_conf.slurmctld_timeout / 3)))			continue;		last_ping = time(NULL);		if (_ping_controller() == 0)			last_controller_response = time(NULL);		else if (takeover) {			/* in takeover mode, take control as soon as */			/* primary no longer respond */			break;		} else {			uint32_t timeout;			lock_slurmctld(config_read_lock);			timeout = slurmctld_conf.slurmctld_timeout;			unlock_slurmctld(config_read_lock);			if (difftime(time(NULL), last_controller_response) >			    timeout) {				break;			}		}	}	if (slurmctld_config.shutdown_time != 0) {		/* Since pidfile is created as user root (its owner is		 *   changed to SlurmUser) SlurmUser may not be able to		 *   remove it, so this is not necessarily an error.		 * No longer need slurmctld_conf lock after above join. */		if (unlink(slurmctld_conf.slurmctld_pidfile) < 0)			verbose("Unable to remove pidfile '%s': %m",				slurmctld_conf.slurmctld_pidfile);		info("BackupController terminating");		pthread_join(slurmctld_config.thread_id_sig, NULL);		log_fini();		if (dump_core)			abort();		else			exit(0);	}	lock_slurmctld(config_read_lock);//.........这里部分代码省略.........
开发者ID:bingzhang,项目名称:slurm,代码行数:101,


示例11: slurm_mutex_lock

/* Checkpoint processing pthread * Never returns, but is cancelled on plugin termiantion */static void *_ckpt_agent_thr(void *arg){	struct ckpt_req *req = (struct ckpt_req *)arg;	int rc;	/* Locks: write job */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };	struct job_record *job_ptr;	struct step_record *step_ptr;	struct check_job_info *check_ptr;	/* only perform ckpt operation of ONE JOB */	slurm_mutex_lock(&ckpt_agent_mutex);	while (ckpt_agent_jobid && ckpt_agent_jobid != req->job_id) {		pthread_cond_wait(&ckpt_agent_cond, &ckpt_agent_mutex);	}	ckpt_agent_jobid = req->job_id;	ckpt_agent_count ++;	slurm_mutex_unlock(&ckpt_agent_mutex);	debug3("checkpoint/blcr: sending checkpoint tasks request %u to %u.%u",	       req->op, req->job_id, req->step_id);	rc = checkpoint_tasks(req->job_id, req->step_id, req->begin_time,			      req->image_dir, req->wait, req->nodelist);	if (rc != SLURM_SUCCESS) {		error("checkpoint/blcr: error on checkpoint request %u to "		      "%u.%u: %s", req->op, req->job_id, req->step_id,		      slurm_strerror(rc));	}	if (req->op == CHECK_REQUEUE)		_requeue_when_finished(req->job_id);	lock_slurmctld(job_write_lock);	job_ptr = find_job_record(req->job_id);	if (!job_ptr) {		error("_ckpt_agent_thr: job finished");		goto out;	}	if (req->step_id == SLURM_BATCH_SCRIPT) {	/* batch job */		check_ptr = (struct check_job_info *)job_ptr->check_job;	} else {		step_ptr = find_step_record(job_ptr, req->step_id);		if (! step_ptr) {			error("_ckpt_agent_thr: step finished");			goto out;		}		check_ptr = (struct check_job_info *)step_ptr->check_job;	}	check_ptr->time_stamp = 0;	check_ptr->error_code = rc;	if (check_ptr->error_code != SLURM_SUCCESS)		check_ptr->error_msg = xstrdup(slurm_strerror(rc)); out:	unlock_slurmctld(job_write_lock);	if (req->sig_done) {		_send_sig(req->job_id, req->step_id, req->sig_done,			  req->nodelist);	}	_on_ckpt_complete(req->gid, req->uid, req->job_id, req->step_id,			  req->image_dir, rc);	slurm_mutex_lock(&ckpt_agent_mutex);	ckpt_agent_count --;	if (ckpt_agent_count == 0) {		ckpt_agent_jobid = 0;		pthread_cond_broadcast(&ckpt_agent_cond);	}	slurm_mutex_unlock(&ckpt_agent_mutex);	_ckpt_req_free(req);	return NULL;}
开发者ID:Cray,项目名称:slurm,代码行数:77,


示例12: job_modify_wiki

//.........这里部分代码省略.........	if (feature_ptr) {		feature_ptr[9] = ':';		feature_ptr += 10;		null_term(feature_ptr);	}	if (gres_ptr) {		gres_ptr[4] = ':';		gres_ptr += 5;		null_term(gres_ptr);	}	if (host_ptr) {		host_ptr[8] = ':';		host_ptr += 9;		null_term(host_ptr);	}	if (name_ptr) {		name_ptr[7] = ':';		name_ptr += 8;		if (name_ptr[0] == '/"') {			name_ptr++;			for (i=0; ; i++) {				if (name_ptr[i] == '/0')					break;				if (name_ptr[i] == '/"') {					name_ptr[i] = '/0';					break;				}			}		} else if (name_ptr[0] == '/'') {			name_ptr++;			for (i=0; ; i++) {				if (name_ptr[i] == '/0')					break;				if (name_ptr[i] == '/'') {					name_ptr[i] = '/0';					break;				}			}		} else			null_term(name_ptr);	}	if (start_ptr) {		start_ptr[12] = ':';		start_ptr += 13;		null_term(start_ptr);	}	if (nodes_ptr) {		nodes_ptr[5] = ':';		nodes_ptr += 6;		new_node_cnt = strtoul(nodes_ptr, NULL, 10);	}	if (part_ptr) {		part_ptr[9] = ':';		part_ptr += 10;		null_term(part_ptr);	}	if (time_ptr) {		time_ptr[9] = ':';		time_ptr += 10;		new_time_limit = strtoul(time_ptr, NULL, 10);	}	if (env_ptr) {		env_ptr[12] = ':';		env_ptr += 13;		null_term(env_ptr);	}	if (wckey_ptr) {		wckey_ptr[5] = ':';		wckey_ptr += 6;		null_term(wckey_ptr);	}	/* Look for any un-parsed "=" ignoring anything after VARIABLELIST	 * which is expected to contain "=" in its value*/	tmp_char = strchr(cmd_ptr, '=');	if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) {		tmp_char[0] = '/0';		while (tmp_char[-1] && (!isspace(tmp_char[-1])))			tmp_char--;		error("wiki: Invalid MODIFYJOB option %s", tmp_char);	}	lock_slurmctld(job_write_lock);	slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr,			new_node_cnt, part_ptr, new_time_limit, name_ptr,			start_ptr, feature_ptr, env_ptr, comment_ptr,			gres_ptr, wckey_ptr);	unlock_slurmctld(job_write_lock);	if (slurm_rc != SLURM_SUCCESS) {		*err_code = -700;		*err_msg = slurm_strerror(slurm_rc);		error("wiki: Failed to modify job %u (%m)", jobid);		return -1;	}	snprintf(reply_msg, sizeof(reply_msg),		"job %u modified successfully", jobid);	*err_msg = reply_msg;	return 0;}
开发者ID:Xarthisius,项目名称:slurm,代码行数:101,


示例13: job_modify_wiki

/* Modify a job: *	CMD=MODIFYJOB ARG=<jobid> PARTITION=<name> NODES=<number> *		DEPEND=afterany:<jobid> TIMELIMT=<seconds> BANK=<name> * RET 0 on success, -1 on failure */extern int	job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg){	char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr;	char *host_ptr, *part_ptr, *time_ptr, *tmp_char;	int slurm_rc;	uint32_t jobid, new_node_cnt = 0, new_time_limit = 0;	static char reply_msg[128];	/* Locks: write job, read node and partition info */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, NO_LOCK };	arg_ptr = strstr(cmd_ptr, "ARG=");	if (arg_ptr == NULL) {		*err_code = -300;		*err_msg = "MODIFYJOB lacks ARG=";		error("wiki: MODIFYJOB lacks ARG=");		return -1;	}	/* Change all parsed "=" to ":" then search for remaining "="	 * and report results as unrecognized options */	arg_ptr[3] = ':';	arg_ptr += 4;	jobid = strtoul(arg_ptr, &tmp_char, 10);	if ((tmp_char[0] != '/0') && (!isspace(tmp_char[0]))) {		*err_code = -300;		*err_msg = "Invalid ARG value";		error("wiki: MODIFYJOB has invalid jobid");		return -1;	}	bank_ptr   = strstr(cmd_ptr, "BANK=");	depend_ptr = strstr(cmd_ptr, "DEPEND=");	host_ptr   = strstr(cmd_ptr, "HOSTLIST=");	nodes_ptr  = strstr(cmd_ptr, "NODES=");	part_ptr   = strstr(cmd_ptr, "PARTITION=");	time_ptr   = strstr(cmd_ptr, "TIMELIMIT=");	if (bank_ptr) {		bank_ptr[4] = ':';		bank_ptr += 5;		null_term(bank_ptr);	}	if (depend_ptr) {		depend_ptr[6] = ':';		depend_ptr += 7;		null_term(depend_ptr);	}	if (host_ptr) {		host_ptr[8] = ':';		host_ptr += 9;		null_term(bank_ptr);	}	if (nodes_ptr) {		nodes_ptr[5] = ':';		nodes_ptr += 6;		new_node_cnt = strtoul(nodes_ptr, NULL, 10);	}	if (part_ptr) {		part_ptr[9] = ':';		part_ptr += 10;		null_term(part_ptr);	}	if (time_ptr) {		time_ptr[9] = ':';		time_ptr += 10;		new_time_limit = strtoul(time_ptr, NULL, 10);	}	/* Look for any un-parsed "=" */	tmp_char = strchr(cmd_ptr, '=');	if (tmp_char) {		tmp_char[0] = '/0';		while (tmp_char[-1] && (!isspace(tmp_char[-1])))			tmp_char--;		error("wiki: Invalid MODIFYJOB option %s", tmp_char);	}	lock_slurmctld(job_write_lock);	slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr,			new_node_cnt, part_ptr, new_time_limit);	unlock_slurmctld(job_write_lock);	if (slurm_rc != SLURM_SUCCESS) {		*err_code = -700;		*err_msg = slurm_strerror(slurm_rc);		error("wiki: Failed to modify job %u (%m)", jobid);		return -1;	}	snprintf(reply_msg, sizeof(reply_msg),		"job %u modified successfully", jobid);	*err_msg = reply_msg;	return 0;}
开发者ID:rathamahata,项目名称:slurm,代码行数:95,


示例14: debug

/* * init_power_save - Initialize the power save module. Started as a *	pthread. Terminates automatically at slurmctld shutdown time. *	Input and output are unused. */static void *_init_power_save(void *arg){        /* Locks: Read nodes */        slurmctld_lock_t node_read_lock = {                NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };        /* Locks: Write nodes */        slurmctld_lock_t node_write_lock = {                NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };	time_t now, boot_time = 0, last_power_scan = 0;	if (power_save_config && !power_save_enabled) {		debug("power_save mode not enabled");		return NULL;	}	suspend_node_bitmap = bit_alloc(node_record_count);	resume_node_bitmap  = bit_alloc(node_record_count);	while (slurmctld_config.shutdown_time == 0) {		sleep(1);		if (_reap_procs() < 2) {			debug("power_save programs getting backlogged");			continue;		}		if ((last_config != slurmctld_conf.last_update) &&		    (_init_power_config())) {			info("power_save mode has been disabled due to "			     "configuration changes");			goto fini;		}		now = time(NULL);		if (boot_time == 0)			boot_time = now;		/* Only run every 60 seconds or after a node state change,		 *  whichever happens first */		if ((last_node_update >= last_power_scan) ||		    (now >= (last_power_scan + 60))) {			lock_slurmctld(node_write_lock);			_do_power_work(now);			unlock_slurmctld(node_write_lock);			last_power_scan = now;		}		if (slurmd_timeout &&		    (now > (boot_time + (slurmd_timeout / 2)))) {			lock_slurmctld(node_read_lock);			_re_wake();			unlock_slurmctld(node_read_lock);			/* prevent additional executions */			boot_time += (365 * 24 * 60 * 60);			slurmd_timeout = 0;		}	}fini:	_clear_power_config();	FREE_NULL_BITMAP(suspend_node_bitmap);	FREE_NULL_BITMAP(resume_node_bitmap);	_shutdown_power();	slurm_mutex_lock(&power_mutex);	power_save_enabled = false;	pthread_cond_signal(&power_cond);	slurm_mutex_unlock(&power_mutex);	pthread_exit(NULL);	return NULL;}
开发者ID:edsw,项目名称:slurm,代码行数:74,


示例15: _start_job

//.........这里部分代码省略.........		job_ptr->details->req_node_layout = (uint16_t *)			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));		bsize = bit_size(new_bitmap);		for (i = 0, ll = -1; i < bsize; i++) {			if (!bit_test(new_bitmap, i))				continue;			ll++;			node_name = node_record_table_ptr[i].name;			node_name_len  = strlen(node_name);			if (node_name_len == 0)				continue;			node_cur = tasklist;			while (*node_cur) {				if ((node_idx = strstr(node_cur, node_name))) {					if ((node_idx[node_name_len] == ',') ||				 	    (node_idx[node_name_len] == '/0')) {						job_ptr->details->							req_node_layout[ll] +=							cpus_per_task;					}					node_cur = strchr(node_idx, ',');					if (node_cur)						continue;				}				break;			}		}	}	/* save and update job state to start now */	save_req_nodes = job_ptr->details->req_nodes;	job_ptr->details->req_nodes = new_node_list;	save_req_bitmap = job_ptr->details->req_node_bitmap;	job_ptr->details->req_node_bitmap = new_bitmap;	old_task_cnt = job_ptr->details->min_cpus;	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);	job_ptr->priority = 100000000; fini:	unlock_slurmctld(job_write_lock);	if (rc)		return rc;	/* No errors so far */	(void) schedule(INFINITE);	/* provides own locking */	/* Check to insure the job was actually started */	lock_slurmctld(job_write_lock);	if (job_ptr->job_id != jobid)		job_ptr = find_job_record(jobid);	if (job_ptr && (job_ptr->job_id == jobid) &&	    (!IS_JOB_RUNNING(job_ptr))) {		uint16_t wait_reason = 0;		char *wait_string;		if (IS_JOB_FAILED(job_ptr))			wait_string = "Invalid request, job aborted";		else {			wait_reason = job_ptr->state_reason;			if (wait_reason == WAIT_HELD) {				/* some job is completing, slurmctld did				 * not even try to schedule this job */				wait_reason = WAIT_RESOURCES;			}			wait_string = job_reason_string(wait_reason);			job_ptr->state_reason = WAIT_HELD;			xfree(job_ptr->state_desc);		}		*err_code = -910 - wait_reason;		snprintf(tmp_msg, sizeof(tmp_msg),			"Could not start job %u(%s): %s",			jobid, new_node_list, wait_string);		*err_msg = tmp_msg;		error("wiki: %s", tmp_msg);		/* restore some of job state */		job_ptr->priority = 0;		job_ptr->details->min_cpus = old_task_cnt;		rc = -1;	}	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {		/* Restore required node list in case job requeued */		xfree(job_ptr->details->req_nodes);		job_ptr->details->req_nodes = save_req_nodes;		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);		job_ptr->details->req_node_bitmap = save_req_bitmap;		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);		xfree(job_ptr->details->req_node_layout);	} else {		error("wiki: start_job(%u) job missing", jobid);		xfree(save_req_nodes);		FREE_NULL_BITMAP(save_req_bitmap);	}	unlock_slurmctld(job_write_lock);	schedule_node_save();	/* provides own locking */	schedule_job_save();	/* provides own locking */	return rc;}
开发者ID:VURM,项目名称:slurm,代码行数:101,


示例16: dump_all_front_end_state

/* dump_all_front_end_state - save the state of all front_end nodes to file */extern int dump_all_front_end_state(void){#ifdef HAVE_FRONT_END	/* Save high-water mark to avoid buffer growth with copies */	static int high_buffer_size = (1024 * 1024);	int error_code = 0, i, log_fd;	char *old_file, *new_file, *reg_file;	front_end_record_t *front_end_ptr;	/* Locks: Read config and node */	slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK,					    NO_LOCK };	Buf buffer = init_buf(high_buffer_size);	DEF_TIMERS;	START_TIMER;	/* write header: version, time */	packstr(FRONT_END_STATE_VERSION, buffer);	pack_time(time(NULL), buffer);	/* write node records to buffer */	lock_slurmctld (node_read_lock);	for (i = 0, front_end_ptr = front_end_nodes;	     i < front_end_node_cnt; i++, front_end_ptr++) {		xassert(front_end_ptr->magic == FRONT_END_MAGIC);		_dump_front_end_state(front_end_ptr, buffer);	}	old_file = xstrdup (slurmctld_conf.state_save_location);	xstrcat (old_file, "/front_end_state.old");	reg_file = xstrdup (slurmctld_conf.state_save_location);	xstrcat (reg_file, "/front_end_state");	new_file = xstrdup (slurmctld_conf.state_save_location);	xstrcat (new_file, "/front_end_state.new");	unlock_slurmctld (node_read_lock);	/* write the buffer to file */	lock_state_files();	log_fd = creat (new_file, 0600);	if (log_fd < 0) {		error ("Can't save state, error creating file %s %m", new_file);		error_code = errno;	} else {		int pos = 0, nwrite = get_buf_offset(buffer), amount, rc;		char *data = (char *)get_buf_data(buffer);		high_buffer_size = MAX(nwrite, high_buffer_size);		while (nwrite > 0) {			amount = write(log_fd, &data[pos], nwrite);			if ((amount < 0) && (errno != EINTR)) {				error("Error writing file %s, %m", new_file);				error_code = errno;				break;			}			nwrite -= amount;			pos    += amount;		}		rc = fsync_and_close(log_fd, "front_end");		if (rc && !error_code)			error_code = rc;	}	if (error_code)		(void) unlink (new_file);	else {	/* file shuffle */		(void) unlink (old_file);		if (link(reg_file, old_file))			debug4("unable to create link for %s -> %s: %m",			       reg_file, old_file);		(void) unlink (reg_file);		if (link(new_file, reg_file))			debug4("unable to create link for %s -> %s: %m",			       new_file, reg_file);		(void) unlink (new_file);	}	xfree (old_file);	xfree (reg_file);	xfree (new_file);	unlock_state_files ();	free_buf (buffer);	END_TIMER2("dump_all_front_end_state");	return error_code;#else	return SLURM_SUCCESS;#endif}
开发者ID:Cray,项目名称:slurm,代码行数:87,


示例17: pthread_setcancelstate

/* _background_rpc_mgr - Read and process incoming RPCs to the background *	controller (that's us) */static void *_background_rpc_mgr(void *no_data){	slurm_fd_t newsockfd;	slurm_fd_t sockfd;	slurm_addr_t cli_addr;	slurm_msg_t *msg = NULL;	int error_code;	char* node_addr = NULL;	/* Read configuration only */	slurmctld_lock_t config_read_lock = {		READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };	int sigarray[] = {SIGUSR1, 0};	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);	debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid());	/* initialize port for RPCs */	lock_slurmctld(config_read_lock);	/* set node_addr to bind to (NULL means any) */	if ((strcmp(slurmctld_conf.backup_controller,		    slurmctld_conf.backup_addr) != 0)) {		node_addr = slurmctld_conf.backup_addr ;	}	if ((sockfd =	     slurm_init_msg_engine_addrname_port(node_addr,						 slurmctld_conf.						 slurmctld_port))	    == SLURM_SOCKET_ERROR)		fatal("slurm_init_msg_engine_addrname_port error %m");	unlock_slurmctld(config_read_lock);	/* Prepare to catch SIGUSR1 to interrupt accept().	 * This signal is generated by the slurmctld signal	 * handler thread upon receipt of SIGABRT, SIGINT,	 * or SIGTERM. That thread does all processing of	 * all signals. */	xsignal(SIGUSR1, _sig_handler);	xsignal_unblock(sigarray);	/*	 * Process incoming RPCs indefinitely	 */	while (slurmctld_config.shutdown_time == 0) {		/* accept needed for stream implementation		 * is a no-op in message implementation that just passes		 * sockfd to newsockfd */		if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr))		    == SLURM_SOCKET_ERROR) {			if (errno != EINTR)				error("slurm_accept_msg_conn: %m");			continue;		}		msg = xmalloc(sizeof(slurm_msg_t));		slurm_msg_t_init(msg);		if (slurm_receive_msg(newsockfd, msg, 0) != 0)			error("slurm_receive_msg: %m");		error_code = _background_process_msg(msg);		if ((error_code == SLURM_SUCCESS)			&&		    (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)	&&		    (slurmctld_config.shutdown_time == 0))			slurmctld_config.shutdown_time = time(NULL);		slurm_free_msg_data(msg->msg_type, msg->data);		slurm_free_msg(msg);		slurm_close(newsockfd);	/* close new socket */	}	debug3("_background_rpc_mgr shutting down");	slurm_close(sockfd);	/* close the main socket */	pthread_exit((void *) 0);	return NULL;}
开发者ID:bingzhang,项目名称:slurm,代码行数:81,


示例18: _start_agent

//.........这里部分代码省略.........				     bg_record->bg_block_id,				     RM_MODIFY_Options,				     conn_type)) != SLURM_SUCCESS)				error("bridge_set_data(RM_MODIFY_Options): %s",				      bg_err_str(rc));		}#endif		if ((rc = bridge_block_modify(bg_record->bg_block_id,					      RM_MODIFY_MloaderImg,					      bg_record->mloaderimage))		    != SLURM_SUCCESS)			error("bridge_block_modify(RM_MODIFY_MloaderImg): %s",			      bg_err_str(rc));#endif		bg_record->modifying = 0;	}no_reboot:	if (bg_record->state == BG_BLOCK_FREE) {		if ((rc = bridge_block_boot(bg_record)) != SLURM_SUCCESS) {			char reason[200];			bg_record->boot_state = 0;			bg_record->boot_count = 0;			if (rc == BG_ERROR_INVALID_STATE)				snprintf(reason, sizeof(reason),					 "Block %s is in an incompatible "					 "state.  This usually means "					 "hardware is allocated "					 "by another block (maybe outside "					 "of SLURM).",					 bg_record->bg_block_id);			else				snprintf(reason, sizeof(reason),					 "Couldn't boot block %s: %s",					 bg_record->bg_block_id,					 bg_err_str(rc));			slurm_mutex_unlock(&block_state_mutex);			requeue_and_error(bg_record, reason);			return;		}	} else if (bg_record->state == BG_BLOCK_BOOTING) {#ifdef HAVE_BG_FILES		bg_record->boot_state = 1;#else		if (!block_ptr_exist_in_list(bg_lists->booted, bg_record))			list_push(bg_lists->booted, bg_record);		bg_record->state = BG_BLOCK_INITED;		last_bg_update = time(NULL);#endif	}	if ((bg_record->job_running <= NO_JOB_RUNNING)	    && !find_job_in_bg_record(bg_record, req_job_id)) {		slurm_mutex_unlock(&block_state_mutex);		debug("job %u finished during the start of the boot "		      "(everything is ok)",		      req_job_id);		return;	}	/* Don't reset boot_count, it will be reset when state	   changes, and needs to outlast a job allocation.	*/	/* bg_record->boot_count = 0; */	if (bg_record->state == BG_BLOCK_INITED) {		debug("block %s is already ready.", bg_record->bg_block_id);		/* Just in case reset the boot flags */		bg_record->boot_state = 0;		bg_record->boot_count = 0;		set_user_rc = bridge_block_sync_users(bg_record);		block_inited = 1;	}	slurm_mutex_unlock(&block_state_mutex);	/* This lock needs to happen after the block_state_mutex to	   avoid deadlock.	*/	if (block_inited && bg_action_ptr->job_ptr) {		slurmctld_lock_t job_write_lock = {			NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };		lock_slurmctld(job_write_lock);		bg_action_ptr->job_ptr->job_state &= (~JOB_CONFIGURING);		last_job_update = time(NULL);		unlock_slurmctld(job_write_lock);	}	if (set_user_rc == SLURM_ERROR) {		sleep(2);		/* wait for the slurmd to begin		   the batch script, slurm_fail_job()		   is a no-op if issued prior		   to the script initiation do clean up just		   in case the fail job isn't ran */		(void) slurm_fail_job(req_job_id, JOB_BOOT_FAIL);	}}
开发者ID:HPCNow,项目名称:slurm,代码行数:101,


示例19: _notify_slurmctld_nodes

static void _notify_slurmctld_nodes(agent_info_t *agent_ptr,				    int no_resp_cnt, int retry_cnt){	ListIterator itr = NULL;	ret_data_info_t *ret_data_info = NULL;	state_t state;	int is_ret_list = 1;	/* Locks: Read config, write job, write node */	slurmctld_lock_t node_write_lock =	    { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };	thd_t *thread_ptr = agent_ptr->thread_struct;	int i;	/* Notify slurmctld of non-responding nodes */	if (no_resp_cnt) {		/* Update node table data for non-responding nodes */		lock_slurmctld(node_write_lock);		if (agent_ptr->msg_type == REQUEST_BATCH_JOB_LAUNCH) {			/* Requeue the request */			batch_job_launch_msg_t *launch_msg_ptr =					*agent_ptr->msg_args_pptr;			uint32_t job_id = launch_msg_ptr->job_id;			job_complete(job_id, 0, true, false, 0);		}		unlock_slurmctld(node_write_lock);	}	if (retry_cnt && agent_ptr->retry)		_queue_agent_retry(agent_ptr, retry_cnt);	/* Update last_response on responding nodes */	lock_slurmctld(node_write_lock);	for (i = 0; i < agent_ptr->thread_count; i++) {		char *down_msg, *node_names;		if (!thread_ptr[i].ret_list) {			state = thread_ptr[i].state;			is_ret_list = 0;			goto switch_on_state;		}		is_ret_list = 1;		itr = list_iterator_create(thread_ptr[i].ret_list);		while ((ret_data_info = list_next(itr))) {			state = ret_data_info->err;		switch_on_state:			switch(state) {			case DSH_NO_RESP:				if (!is_ret_list) {					node_not_resp(thread_ptr[i].nodelist,						      thread_ptr[i].						      start_time);				} else {					node_not_resp(ret_data_info->node_name,						      thread_ptr[i].start_time);				}				break;			case DSH_FAILED:				if (is_ret_list)					node_names = ret_data_info->node_name;				else					node_names = thread_ptr[i].nodelist;#ifdef HAVE_FRONT_END				down_msg = "";#else				set_node_down(node_names,					      "Prolog/Epilog failure");				down_msg = ", set to state DOWN";#endif				error("Prolog/Epilog failure on nodes %s%s",				      node_names, down_msg);				break;			case DSH_DONE:				if (!is_ret_list)					node_did_resp(thread_ptr[i].nodelist);				else					node_did_resp(ret_data_info->node_name);				break;			default:				if (!is_ret_list) {					error("unknown state returned for %s",					      thread_ptr[i].nodelist);				} else {					error("unknown state returned for %s",					      ret_data_info->node_name);				}				break;			}			if (!is_ret_list)				goto finished;		}		list_iterator_destroy(itr);finished:	;	}	unlock_slurmctld(node_write_lock);	if (run_scheduler) {		run_scheduler = false;		/* below functions all have their own locking */		if (schedule(0))	{			schedule_job_save();			schedule_node_save();		}//.........这里部分代码省略.........
开发者ID:alepharchives,项目名称:slurm,代码行数:101,


示例20: xassert

//.........这里部分代码省略.........				goto cleanup;			}		}		//info("sending %u to %s", msg_type, thread_ptr->nodelist);		if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) {			thread_state = DSH_DONE;		} else {			if (!srun_agent)				_comm_err(thread_ptr->nodelist, msg_type);		}		goto cleanup;	}	//info("got %d messages back", list_count(ret_list));	found = 0;	itr = list_iterator_create(ret_list);	while ((ret_data_info = list_next(itr)) != NULL) {		rc = slurm_get_return_code(ret_data_info->type,					   ret_data_info->data);		/* SPECIAL CASE: Mark node as IDLE if job already		   complete */		if (is_kill_msg &&		    (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) {			kill_job_msg_t *kill_job;			kill_job = (kill_job_msg_t *)				task_ptr->msg_args_ptr;			rc = SLURM_SUCCESS;			lock_slurmctld(job_write_lock);			if (job_epilog_complete(kill_job->job_id,						ret_data_info->						node_name,						rc))				run_scheduler = true;			unlock_slurmctld(job_write_lock);		}		/* SPECIAL CASE: Kill non-startable batch job,		 * Requeue the job on ESLURMD_PROLOG_FAILED */		if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) &&		    (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) &&		    (ret_data_info->type != RESPONSE_FORWARD_FAILED)) {			batch_job_launch_msg_t *launch_msg_ptr =				task_ptr->msg_args_ptr;			uint32_t job_id = launch_msg_ptr->job_id;			info("Killing non-startable batch job %u: %s",			     job_id, slurm_strerror(rc));			thread_state = DSH_DONE;			ret_data_info->err = thread_state;			lock_slurmctld(job_write_lock);			job_complete(job_id, 0, false, false, _wif_status());			unlock_slurmctld(job_write_lock);			continue;		}		if (((msg_type == REQUEST_SIGNAL_TASKS) ||		     (msg_type == REQUEST_TERMINATE_TASKS)) &&		     (rc == ESRCH)) {			/* process is already dead, not a real error */			rc = SLURM_SUCCESS;		}		switch (rc) {		case SLURM_SUCCESS:			/* debug("agent processed RPC to node %s", */			/*       ret_data_info->node_name); */			thread_state = DSH_DONE;			break;
开发者ID:alepharchives,项目名称:slurm,代码行数:67,


示例21: dump_all_part_state

/* dump_all_part_state - save the state of all partitions to file */int dump_all_part_state(void){	/* Save high-water mark to avoid buffer growth with copies */	static int high_buffer_size = BUF_SIZE;	ListIterator part_iterator;	struct part_record *part_ptr;	int error_code = 0, log_fd;	char *old_file, *new_file, *reg_file;	/* Locks: Read partition */	slurmctld_lock_t part_read_lock =	    { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK };	Buf buffer = init_buf(high_buffer_size);	DEF_TIMERS;	START_TIMER;	/* write header: time */	packstr(PART_STATE_VERSION, buffer);	pack_time(time(NULL), buffer);	/* write partition records to buffer */	lock_slurmctld(part_read_lock);	part_iterator = list_iterator_create(part_list);	while ((part_ptr = (struct part_record *) list_next(part_iterator))) {		xassert (part_ptr->magic == PART_MAGIC);		_dump_part_state(part_ptr, buffer);	}	list_iterator_destroy(part_iterator);	old_file = xstrdup(slurmctld_conf.state_save_location);	xstrcat(old_file, "/part_state.old");	reg_file = xstrdup(slurmctld_conf.state_save_location);	xstrcat(reg_file, "/part_state");	new_file = xstrdup(slurmctld_conf.state_save_location);	xstrcat(new_file, "/part_state.new");	unlock_slurmctld(part_read_lock);	/* write the buffer to file */	lock_state_files();	log_fd = creat(new_file, 0600);	if (log_fd < 0) {		error("Can't save state, error creating file %s, %m",		      new_file);		error_code = errno;	} else {		int pos = 0, nwrite = get_buf_offset(buffer), amount, rc;		char *data = (char *)get_buf_data(buffer);		high_buffer_size = MAX(nwrite, high_buffer_size);		while (nwrite > 0) {			amount = write(log_fd, &data[pos], nwrite);			if ((amount < 0) && (errno != EINTR)) {				error("Error writing file %s, %m", new_file);				error_code = errno;				break;			}			nwrite -= amount;			pos    += amount;		}		rc = fsync_and_close(log_fd, "partition");		if (rc && !error_code)			error_code = rc;	}	if (error_code)		(void) unlink(new_file);	else {			/* file shuffle */		(void) unlink(old_file);		if (link(reg_file, old_file)) {			debug4("unable to create link for %s -> %s: %m",			       reg_file, old_file);		}		(void) unlink(reg_file);		if (link(new_file, reg_file)) {			debug4("unable to create link for %s -> %s: %m",			       new_file, reg_file);		}		(void) unlink(new_file);	}	xfree(old_file);	xfree(reg_file);	xfree(new_file);	unlock_state_files();	free_buf(buffer);	END_TIMER2("dump_all_part_state");	return 0;}
开发者ID:kwangiit,项目名称:SLURMPP,代码行数:87,


示例22: job_will_run2

//.........这里部分代码省略......... */extern int	job_will_run2(char *cmd_ptr, int *err_code, char **err_msg){	char *arg_ptr, *buf, *tmp_buf, *tmp_char;	int preemptee_cnt = 0;	uint32_t jobid, *preemptee = NULL, tmp_id;	time_t start_time;	char *avail_nodes = NULL;	/* Locks: write job, read node and partition info */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };	arg_ptr = strstr(cmd_ptr, "ARG=");	if (arg_ptr == NULL) {		*err_code = -300;		*err_msg = "JOBWILLRUN lacks ARG";		error("wiki: JOBWILLRUN lacks ARG");		return -1;	}	arg_ptr += 4;	jobid = strtoul(arg_ptr, &tmp_char, 10);	if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0')) {		*err_code = -300;		*err_msg = "Invalid ARG value";		error("wiki: JOBWILLRUN has invalid ARG value");		return -1;	}	arg_ptr = strstr(cmd_ptr, "STARTTIME=");	if (arg_ptr) {		arg_ptr += 10;		start_time = strtoul(arg_ptr, &tmp_char, 10);		if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0')) {			*err_code = -300;			*err_msg = "Invalid STARTTIME value";			error("wiki: JOBWILLRUN has invalid STARTTIME value");			return -1;		}	} else {		start_time = time(NULL);	}	arg_ptr = strstr(cmd_ptr, "PREEMPT=");	if (arg_ptr) {		arg_ptr += 8;		preemptee = xmalloc(sizeof(uint32_t) * strlen(arg_ptr));		while (1) {			tmp_id = strtoul(arg_ptr, &tmp_char, 10);			if ((tmp_char[0] != ' ') && (tmp_char[0] != '/0') &&			    (tmp_char[0] != ',')) {				*err_code = -300;				*err_msg = "Invalid PREEMPT value";				error("wiki: JOBWILLRUN has invalid PREEMPT "				      "value");				xfree(preemptee);				xfree(avail_nodes);				return -1;			}			preemptee[preemptee_cnt++] = tmp_id;			if (tmp_char[0] != ',')				break;			arg_ptr = tmp_char + 1;		}	}	/* Keep this last, since we modify the input string */	arg_ptr = strstr(cmd_ptr, "NODES=");	if (arg_ptr) {		arg_ptr += 6;		avail_nodes = xstrdup(arg_ptr);		arg_ptr = strchr(avail_nodes, ' ');		if (arg_ptr)			arg_ptr[0] = '/0';	} else {		*err_code = -300;		*err_msg = "Missing NODES value";		error("wiki: JOBWILLRUN lacks NODES value");		xfree(preemptee);		return -1;	}	lock_slurmctld(job_write_lock);	buf = _will_run_test2(jobid, start_time, avail_nodes,			      preemptee, preemptee_cnt,			      err_code, err_msg);	unlock_slurmctld(job_write_lock);	xfree(preemptee);	xfree(avail_nodes);	if (!buf)		return -1;	tmp_buf = xmalloc(strlen(buf) + 32);	sprintf(tmp_buf, "SC=0 ARG=%s", buf);	xfree(buf);	*err_code = 0;	*err_msg = tmp_buf;	return 0;}
开发者ID:beninim,项目名称:slurm_simulator,代码行数:101,


示例23: _proc_msg

static void _proc_msg(int new_fd, char *msg, slurm_addr_t cli_addr){	/* Locks: Read job and node data */	slurmctld_lock_t job_read_lock = {		NO_LOCK, READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };	/* Locks: Write job */	slurmctld_lock_t job_write_lock = {		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };	/* Locks: Write job, write node, read partition */	slurmctld_lock_t job_write_lock2 = {		NO_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };	/* Locks: Write node data */	slurmctld_lock_t node_write_lock = {		NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK, READ_LOCK };	char *cmd_ptr, *resp = NULL, *msg_decrypted = NULL;	uid_t cmd_uid;	uint32_t protocol_version = 0;	if (!msg) {		info("slurmctld/nonstop: NULL message received");		resp = xstrdup("Error:/"NULL message received/"");		goto send_resp;	}	msg_decrypted = _decrypt(msg, &cmd_uid);	if (!msg_decrypted) {		info("slurmctld/nonstop: Message decrypt failure");		resp = xstrdup("Error:/"Message decrypt failure/"");		goto send_resp;	}	if (nonstop_debug > 0)		info("slurmctld/nonstop: msg decrypted:%s", msg_decrypted);	cmd_ptr = msg_decrypted;	/* 123456789012345678901234567890 */	if (xstrncmp(cmd_ptr, version_string, 13) == 0) {		cmd_ptr = strchr(cmd_ptr + 13, ':');		if (cmd_ptr) {			cmd_ptr++;			protocol_version = SLURM_PROTOCOL_VERSION;		}	}	if (protocol_version == 0) {		info("slurmctld/nonstop: Message version invalid");		resp = xstrdup("Error:/"Message version invalid/"");		goto send_resp;	}	if (xstrncmp(cmd_ptr, "CALLBACK:JOBID:", 15) == 0) {		resp = register_callback(cmd_ptr, cmd_uid, cli_addr,					 protocol_version);	} else if (xstrncmp(cmd_ptr, "DRAIN:NODES:", 12) == 0) {		lock_slurmctld(node_write_lock);		resp = drain_nodes_user(cmd_ptr, cmd_uid, protocol_version);		unlock_slurmctld(node_write_lock);	} else if (xstrncmp(cmd_ptr, "DROP_NODE:JOBID:", 15) == 0) {		lock_slurmctld(job_write_lock2);		resp = drop_node(cmd_ptr, cmd_uid, protocol_version);		unlock_slurmctld(job_write_lock2);	} else if (xstrncmp(cmd_ptr, "GET_FAIL_NODES:JOBID:", 21) == 0) {		lock_slurmctld(job_read_lock);		resp = fail_nodes(cmd_ptr, cmd_uid, protocol_version);		unlock_slurmctld(job_read_lock);	} else if (xstrncmp(cmd_ptr, "REPLACE_NODE:JOBID:", 19) == 0) {		lock_slurmctld(job_write_lock2);		resp = replace_node(cmd_ptr, cmd_uid, protocol_version);		unlock_slurmctld(job_write_lock2);	} else if (xstrncmp(cmd_ptr, "SHOW_CONFIG", 11) == 0) {		resp = show_config(cmd_ptr, cmd_uid, protocol_version);	} else if (xstrncmp(cmd_ptr, "SHOW_JOB:JOBID:", 15) == 0) {		resp = show_job(cmd_ptr, cmd_uid, protocol_version);	} else if (xstrncmp(cmd_ptr, "TIME_INCR:JOBID:", 16) == 0) {		lock_slurmctld(job_write_lock);		resp = time_incr(cmd_ptr, cmd_uid, protocol_version);		unlock_slurmctld(job_write_lock);	} else {		info("slurmctld/nonstop: Invalid command: %s", cmd_ptr);		xstrfmtcat(resp, "%s ECMD", SLURM_VERSION_STRING);	} send_resp:	if (nonstop_debug > 0)		info("slurmctld/nonstop: msg send:%s", resp);	_send_reply(new_fd, resp);	xfree(resp);	if (msg_decrypted)		free(msg_decrypted);	return;}
开发者ID:artpol84,项目名称:slurm,代码行数:89,


示例24: slurm_conf_lock

/*****************************************************************************/ * message hander thread/*****************************************************************************/static void *_msg_thread(void *no_data){	slurm_fd_t sock_fd = -1, new_fd;	slurm_addr_t cli_addr;	char *msg;	slurm_ctl_conf_t *conf;	int i;	/* Locks: Write configuration, job, node, and partition */	slurmctld_lock_t config_write_lock = {		WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK };	conf = slurm_conf_lock();	sched_port = conf->schedport;	slurm_conf_unlock();	/* Wait until configuration is completely loaded */	lock_slurmctld(config_write_lock);	unlock_slurmctld(config_write_lock);	/* If SchedulerPort is already taken, keep trying to open it	 * once per minute. Slurmctld will continue to function	 * during this interval even if nothing can be scheduled. */	for (i=0; (!thread_shutdown); i++) {		if (i > 0)			sleep(60);		sock_fd = slurm_init_msg_engine_port(sched_port);		if (sock_fd != SLURM_SOCKET_ERROR)			break;		error("wiki: slurm_init_msg_engine_port %u %m",			sched_port);		error("wiki: Unable to communicate with Moab");	}	/* Process incoming RPCs until told to shutdown */	while (!thread_shutdown) {		if ((new_fd = slurm_accept_msg_conn(sock_fd, &cli_addr))				== SLURM_SOCKET_ERROR) {			if (errno != EINTR)				error("wiki: slurm_accept_msg_conn %m");			continue;		}                if (thread_shutdown) {                        close(new_fd);                        break;                }		/* It would be nice to create a pthread for each new		 * RPC, but that leaks memory on some systems when		 * done from a plugin.		 * FIXME: Maintain a pool of and reuse them. */		err_code = 0;		err_msg = "";		msg = _recv_msg(new_fd);		if (msg) {			_proc_msg(new_fd, msg);			xfree(msg);		}		slurm_close_accepted_conn(new_fd);	}	if (sock_fd > 0)		(void) slurm_shutdown_msg_engine(sock_fd);	pthread_exit((void *) 0);	return NULL;}
开发者ID:IFCA,项目名称:slurm,代码行数:66,



注:本文中的unlock_slurmctld函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ unlock_super函数代码示例
C++ unlock_rename函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。