Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • becker29/master-thesis-custom-ompi
  • felixkhals/swp-cm22-planbased-mpi
2 results
Select Git revision
  • bugfix/debug_print
  • bugfix/rank_setting
  • master
3 results
Show changes
Commits on Source (8)
......@@ -13,6 +13,8 @@
.hg
.hgignore_local
build_docker
*.la
*.lo
*.o
......
......@@ -32,35 +32,38 @@
#include "ompi_config.h"
#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <unistd.h>
#include <string.h>
#include <endian.h>
#include "opal/util/bit_ops.h"
#include "opal/util/info_subscriber.h"
#include "opal/util/string_copy.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "ompi/attribute/attribute.h"
#include "ompi/communicator/communicator.h"
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/dpm/dpm.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/runtime/params.h"
#include "ompi/communicator/communicator.h"
#include "ompi/attribute/attribute.h"
#include "ompi/dpm/dpm.h"
#include "ompi/memchecker.h"
#include "ompi/runtime/params.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "opal/util/bit_ops.h"
#include "opal/util/info_subscriber.h"
#include "opal/util/string_copy.h"
#define FD_STDIN 0
#define BUFFLEN 128
#define BUFFLEN 128
#define JOBID_ENV_VAR "SLURM_VRM_JOBID"
#define PMIX_DYNAMIC_ID_ENV_VAR "DPM_PMIX_DYNAMIC_ID"
#define JOBID_ENV_VAR "SLURM_VRM_JOBID"
#define SLURM_JOBID_ENV_VAR "SLURM_JOB_ID"
#define PMIX_NAMESPACE_ENV_VAR "PMIX_NAMESPACE"
/*
** Table for Fortran <-> C communicator handle conversion
......@@ -71,31 +74,28 @@
opal_pointer_array_t ompi_mpi_communicators = {{0}};
opal_pointer_array_t ompi_comm_f_to_c_table = {{0}};
ompi_predefined_communicator_t ompi_mpi_comm_world = {{{{0}}}};
ompi_predefined_communicator_t ompi_mpi_comm_self = {{{{0}}}};
ompi_predefined_communicator_t ompi_mpi_comm_null = {{{{0}}}};
ompi_communicator_t *ompi_mpi_comm_parent = NULL;
ompi_predefined_communicator_t ompi_mpi_comm_world = {{{{0}}}};
ompi_predefined_communicator_t ompi_mpi_comm_self = {{{{0}}}};
ompi_predefined_communicator_t ompi_mpi_comm_null = {{{{0}}}};
ompi_communicator_t *ompi_mpi_comm_parent = NULL;
ompi_predefined_communicator_t *ompi_mpi_comm_world_addr =
&ompi_mpi_comm_world;
ompi_predefined_communicator_t *ompi_mpi_comm_self_addr =
&ompi_mpi_comm_self;
ompi_predefined_communicator_t *ompi_mpi_comm_null_addr =
&ompi_mpi_comm_null;
ompi_predefined_communicator_t *ompi_mpi_comm_world_addr = &ompi_mpi_comm_world;
ompi_predefined_communicator_t *ompi_mpi_comm_self_addr = &ompi_mpi_comm_self;
ompi_predefined_communicator_t *ompi_mpi_comm_null_addr = &ompi_mpi_comm_null;
static void ompi_comm_construct(ompi_communicator_t* comm);
static void ompi_comm_destruct(ompi_communicator_t* comm);
static void ompi_comm_construct(ompi_communicator_t *comm);
static void ompi_comm_destruct(ompi_communicator_t *comm);
OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_infosubscriber_t,
ompi_comm_construct,
OBJ_CLASS_INSTANCE(ompi_communicator_t, opal_infosubscriber_t, ompi_comm_construct,
ompi_comm_destruct);
/* This is the counter for the number of communicators, which contain
process with more than one jobid. This counter is a usefull
shortcut for finalize and abort. */
int ompi_comm_num_dyncomm=0;
int ompi_comm_num_dyncomm = 0;
static void errorExit(char* msg) {
static void errorExit(char *msg)
{
perror(msg);
exit(1);
}
......@@ -104,17 +104,19 @@ static void errorExit(char* msg) {
* Connects to the node agent, sends its own process information
* and receives the list of modified ranks
*/
static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_vpid_t *modified_ranks) {
static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size,
opal_vpid_t *modified_ranks)
{
struct addrinfo hints;
struct addrinfo *result, *rp;
const char* host_ip = "localhost";
const char * agent_port = getenv("DPM_AGENT_PORT");
const char *host_ip = "localhost";
const char *agent_port = getenv("DPM_AGENT_PORT");
if (NULL == agent_port) {
errorExit("Could not find DPM_AGENT_PORT env");
}
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM; //opting for reliable sequenced socket type
hints.ai_socktype = SOCK_STREAM; // opting for reliable sequenced socket type
hints.ai_flags = 0;
hints.ai_protocol = IPPROTO_TCP;
......@@ -132,12 +134,12 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
}
if (0 == connect(socket_fd, rp->ai_addr, rp->ai_addrlen)) {
break; //successfully connected
break; // successfully connected
} else {
close(socket_fd); //close socket when we cannot connect
close(socket_fd); // close socket when we cannot connect
}
}
freeaddrinfo(result); //free result
freeaddrinfo(result); // free result
// exit if the result-linked-list has been traversed until the end without a successful
// connection
if (NULL == rp) {
......@@ -146,7 +148,8 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
pid_t pid = getpid();
const char * vrm_jobid = getenv(JOBID_ENV_VAR);
// Add VRM JobID
const char *vrm_jobid = getenv(JOBID_ENV_VAR);
char vrm_jobid_with_leading_comma[sizeof(uint64_t) + 1] = "";
if (NULL != vrm_jobid) {
char comma = ',';
......@@ -155,11 +158,39 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
printf("TEST: %s", vrm_jobid_with_leading_comma);
}
// Add Slurm JobID
const char *slurm_jobid = getenv(SLURM_JOBID_ENV_VAR);
char slurm_jobid_str[sizeof(uint64_t) + 1] = "";
if (NULL != slurm_jobid) {
char comma = ',';
strncat(slurm_jobid_str, &comma, 1);
strcat(slurm_jobid_str, slurm_jobid);
printf("TEST JobID: %s", slurm_jobid_str);
}
// Dynamic Identifier
const char *pmix_id_offset = getenv(PMIX_DYNAMIC_ID_ENV_VAR);
char pmix_dynamic_id[128] = "";
char pmix_namespace[257] = ""; // PMIx max namespace len + comma
if (NULL != pmix_id_offset) {
size_t dynamic_id = 0;
sscanf(pmix_id_offset, "%zu", &dynamic_id);
dynamic_id += vpid;
sprintf(pmix_dynamic_id, ",%zu", dynamic_id);
// PMIx Namespace
const char *pmix_nspace_env = getenv(PMIX_NAMESPACE_ENV_VAR);
if (NULL != pmix_nspace_env) {
strncat(pmix_namespace, ",", 1);
strcat(pmix_namespace, pmix_nspace_env);
}
}
char info_to_send[BUFFLEN];
memset(info_to_send, 0, BUFFLEN);
snprintf(info_to_send, BUFFLEN,
"{\"msg_type\": 128, \"msg_data\": \"%d,%u,%u,%zu%s\"}",
pid, vpid, jobid, size, vrm_jobid_with_leading_comma);
snprintf(info_to_send, BUFFLEN, "{\"msg_type\": 128, \"msg_data\": \"%d,%u%s,%zu%s%s%s\"}", pid,
vpid, slurm_jobid_str, size, vrm_jobid_with_leading_comma, pmix_dynamic_id,
pmix_namespace);
uint32_t msg_length = strlen(info_to_send) + 1;
// Ensure that little endian is used for communinication (by convention with server)
......@@ -175,10 +206,11 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
// drop (later overwrite) the length since it is not needed
recv(socket_fd, &le_answer_length, sizeof(le_answer_length), 0);
uint32_t answer_length = le32toh(le_answer_length);
char * rank_to_recv = calloc (answer_length, sizeof(char));
char *rank_to_recv = calloc(answer_length, sizeof(char));
// receive the actual message content
recv(socket_fd, rank_to_recv, answer_length, 0);
printf("Received from server: %s\n", rank_to_recv);
fflush(stdout);
// look for msg_data field in JSON string
const char msg_data_key[] = "msg_data";
......@@ -213,15 +245,15 @@ int ompi_comm_init(void)
/* Setup communicator array */
OBJ_CONSTRUCT(&ompi_mpi_communicators, opal_pointer_array_t);
if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_mpi_communicators, 16,
OMPI_FORTRAN_HANDLE_MAX, 64) ) {
if (OPAL_SUCCESS
!= opal_pointer_array_init(&ompi_mpi_communicators, 16, OMPI_FORTRAN_HANDLE_MAX, 64)) {
return OMPI_ERROR;
}
/* Setup f to c table (we can no longer use the cid as the fortran handle) */
OBJ_CONSTRUCT(&ompi_comm_f_to_c_table, opal_pointer_array_t);
if( OPAL_SUCCESS != opal_pointer_array_init(&ompi_comm_f_to_c_table, 8,
OMPI_FORTRAN_HANDLE_MAX, 32) ) {
if (OPAL_SUCCESS
!= opal_pointer_array_init(&ompi_comm_f_to_c_table, 8, OMPI_FORTRAN_HANDLE_MAX, 32)) {
return OMPI_ERROR;
}
......@@ -231,43 +263,44 @@ int ompi_comm_init(void)
group = OBJ_NEW(ompi_group_t);
size = ompi_process_info.num_procs;
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_pointers = (ompi_proc_t **) calloc(size, sizeof(ompi_proc_t *));
group->grp_proc_count = size;
modified_ranks = (opal_vpid_t *) calloc (size, sizeof (opal_vpid_t));
modified_ranks = (opal_vpid_t *) calloc(size, sizeof(opal_vpid_t));
get_modified_ranks(OMPI_PROC_MY_NAME->jobid, OMPI_PROC_MY_NAME->vpid, size, modified_ranks);
for (size_t i = 0 ; i < size ; ++i) {
for (size_t i = 0; i < size; ++i) {
opal_vpid_t modified_rank = modified_ranks[i];
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
group->grp_proc_pointers[modified_rank] = (ompi_proc_t *) ompi_proc_lookup (name);
group->grp_proc_pointers[modified_rank] = (ompi_proc_t *) ompi_proc_lookup(name);
if (NULL == group->grp_proc_pointers[modified_rank]) {
/* set sentinel value */
group->grp_proc_pointers[modified_rank] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
group->grp_proc_pointers[modified_rank] = (ompi_proc_t *) ompi_proc_name_to_sentinel(
name);
} else {
OBJ_RETAIN (group->grp_proc_pointers[modified_rank]);
OBJ_RETAIN(group->grp_proc_pointers[modified_rank]);
}
}
free(modified_ranks);
OMPI_GROUP_SET_INTRINSIC (group);
OMPI_GROUP_SET_DENSE (group);
OMPI_GROUP_SET_INTRINSIC(group);
OMPI_GROUP_SET_DENSE(group);
ompi_set_group_rank(group, ompi_proc_local());
ompi_mpi_comm_world.comm.c_contextid = 0;
ompi_mpi_comm_world.comm.c_contextid = 0;
ompi_mpi_comm_world.comm.c_id_start_index = 4;
ompi_mpi_comm_world.comm.c_id_available = 4;
ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank;
ompi_mpi_comm_world.comm.c_local_group = group;
ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank;
ompi_mpi_comm_world.comm.c_local_group = group;
ompi_mpi_comm_world.comm.c_remote_group = group;
OBJ_RETAIN(ompi_mpi_comm_world.comm.c_remote_group);
ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int)size);
ompi_mpi_comm_world.comm.error_handler = ompi_initial_error_handler_eh;
OBJ_RETAIN( ompi_mpi_comm_world.comm.error_handler );
ompi_mpi_comm_world.comm.c_cube_dim = opal_cube_dim((int) size);
ompi_mpi_comm_world.comm.error_handler = ompi_initial_error_handler_eh;
OBJ_RETAIN(ompi_mpi_comm_world.comm.error_handler);
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_world.comm);
opal_pointer_array_set_item (&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);
opal_pointer_array_set_item(&ompi_mpi_communicators, 0, &ompi_mpi_comm_world);
opal_string_copy(ompi_mpi_comm_world.comm.c_name, "MPI_COMM_WORLD",
sizeof(ompi_mpi_comm_world.comm.c_name));
......@@ -288,12 +321,12 @@ int ompi_comm_init(void)
ranks to use for aggregators
*/
opal_process_name_t wildcard = {OMPI_PROC_MY_NAME->jobid, OPAL_VPID_WILDCARD};
char *str=NULL;
char *str = NULL;
int rc;
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_MAPBY, &wildcard, &str, PMIX_STRING);
if ( 0 == rc && NULL != str) {
if ( strstr ( str, "BYNODE") ) {
if (0 == rc && NULL != str) {
if (strstr(str, "BYNODE")) {
OMPI_COMM_SET_MAPBY_NODE(&ompi_mpi_comm_world.comm);
}
if (NULL != str) {
......@@ -305,22 +338,22 @@ int ompi_comm_init(void)
assert(ompi_mpi_comm_self.comm.c_f_to_c_index == 1);
group = OBJ_NEW(ompi_group_t);
group->grp_proc_pointers = ompi_proc_self(&size);
group->grp_my_rank = 0;
group->grp_proc_count = (int)size;
OMPI_GROUP_SET_INTRINSIC (group);
OMPI_GROUP_SET_DENSE (group);
group->grp_my_rank = 0;
group->grp_proc_count = (int) size;
OMPI_GROUP_SET_INTRINSIC(group);
OMPI_GROUP_SET_DENSE(group);
ompi_mpi_comm_self.comm.c_contextid = 1;
ompi_mpi_comm_self.comm.c_contextid = 1;
ompi_mpi_comm_self.comm.c_id_start_index = 20;
ompi_mpi_comm_self.comm.c_id_available = 20;
ompi_mpi_comm_self.comm.c_my_rank = group->grp_my_rank;
ompi_mpi_comm_self.comm.c_local_group = group;
ompi_mpi_comm_self.comm.c_my_rank = group->grp_my_rank;
ompi_mpi_comm_self.comm.c_local_group = group;
ompi_mpi_comm_self.comm.c_remote_group = group;
OBJ_RETAIN(ompi_mpi_comm_self.comm.c_remote_group);
ompi_mpi_comm_self.comm.error_handler = ompi_initial_error_handler_eh;
OBJ_RETAIN( ompi_mpi_comm_self.comm.error_handler );
ompi_mpi_comm_self.comm.error_handler = ompi_initial_error_handler_eh;
OBJ_RETAIN(ompi_mpi_comm_self.comm.error_handler);
OMPI_COMM_SET_PML_ADDED(&ompi_mpi_comm_self.comm);
opal_pointer_array_set_item (&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);
opal_pointer_array_set_item(&ompi_mpi_communicators, 1, &ompi_mpi_comm_self);
opal_string_copy(ompi_mpi_comm_self.comm.c_name, "MPI_COMM_SELF",
sizeof(ompi_mpi_comm_self.comm.c_name));
......@@ -335,19 +368,19 @@ int ompi_comm_init(void)
/* Setup MPI_COMM_NULL */
OBJ_CONSTRUCT(&ompi_mpi_comm_null, ompi_communicator_t);
assert(ompi_mpi_comm_null.comm.c_f_to_c_index == 2);
ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group;
ompi_mpi_comm_null.comm.c_local_group = &ompi_mpi_group_null.group;
ompi_mpi_comm_null.comm.c_remote_group = &ompi_mpi_group_null.group;
OBJ_RETAIN(&ompi_mpi_group_null.group);
OBJ_RETAIN(&ompi_mpi_group_null.group);
ompi_mpi_comm_null.comm.c_contextid = 2;
ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL;
ompi_mpi_comm_null.comm.c_contextid = 2;
ompi_mpi_comm_null.comm.c_my_rank = MPI_PROC_NULL;
/* unlike world, self, and parent, comm_null does not inherit the initial error
* handler */
ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh;
OBJ_RETAIN( ompi_mpi_comm_null.comm.error_handler );
opal_pointer_array_set_item (&ompi_mpi_communicators, 2, &ompi_mpi_comm_null);
ompi_mpi_comm_null.comm.error_handler = &ompi_mpi_errors_are_fatal.eh;
OBJ_RETAIN(ompi_mpi_comm_null.comm.error_handler);
opal_pointer_array_set_item(&ompi_mpi_communicators, 2, &ompi_mpi_comm_null);
opal_string_copy(ompi_mpi_comm_null.comm.c_name, "MPI_COMM_NULL",
sizeof(ompi_mpi_comm_null.comm.c_name));
......@@ -363,22 +396,21 @@ int ompi_comm_init(void)
* as comm_world (thus, the initial error handler). */
/* initialize communicator requests (for ompi_comm_idup) */
ompi_comm_request_init ();
ompi_comm_request_init();
return OMPI_SUCCESS;
}
ompi_communicator_t *ompi_comm_allocate ( int local_size, int remote_size )
ompi_communicator_t *ompi_comm_allocate(int local_size, int remote_size)
{
ompi_communicator_t *new_comm;
/* create new communicator element */
new_comm = OBJ_NEW(ompi_communicator_t);
new_comm->super.s_info = NULL;
new_comm->c_local_group = ompi_group_allocate ( local_size );
if ( 0 < remote_size ) {
new_comm->c_remote_group = ompi_group_allocate (remote_size);
new_comm->c_local_group = ompi_group_allocate(local_size);
if (0 < remote_size) {
new_comm->c_remote_group = ompi_group_allocate(remote_size);
new_comm->c_flags |= OMPI_COMM_INTER;
} else {
/*
......@@ -401,32 +433,39 @@ int ompi_comm_finalize(void)
ompi_communicator_t *comm;
/* Shut down MPI_COMM_SELF */
OBJ_DESTRUCT( &ompi_mpi_comm_self );
OBJ_DESTRUCT(&ompi_mpi_comm_self);
printf("FIN 7aa\n");
fflush(stdout);
/* disconnect all dynamic communicators */
ompi_dpm_dyn_finalize();
printf("FIN 7ab\n");
fflush(stdout);
/* Free the attributes on comm world. This is not done in the
* destructor as we delete attributes in ompi_comm_free (which
* is not called for comm world) */
if (NULL != ompi_mpi_comm_world.comm.c_keyhash) {
/* Ignore errors when deleting attributes on comm_world */
(void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm, ompi_mpi_comm_world.comm.c_keyhash);
(void) ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_world.comm,
ompi_mpi_comm_world.comm.c_keyhash);
OBJ_RELEASE(ompi_mpi_comm_world.comm.c_keyhash);
}
/* Shut down MPI_COMM_WORLD */
OBJ_DESTRUCT( &ompi_mpi_comm_world );
OBJ_DESTRUCT(&ompi_mpi_comm_world);
/* Shut down the parent communicator, if it exists */
if( ompi_mpi_comm_parent != &ompi_mpi_comm_null.comm ) {
if (ompi_mpi_comm_parent != &ompi_mpi_comm_null.comm) {
/* Note that we pass ompi_mpi_comm_parent here
(vs. &ompi_mpi_comm_parent) because it is of type
(ompi_communicator_t*), *NOT* (ompi_communicator_t). This
is because a parent communicator is created dynamically
during init, and we just set this pointer to it. Hence, we
just pass in the pointer here. */
OBJ_DESTRUCT (ompi_mpi_comm_parent);
OBJ_DESTRUCT(ompi_mpi_comm_parent);
/* Please note, that the we did increase the reference count
for ompi_mpi_comm_null, ompi_mpi_group_null, and
......@@ -448,19 +487,19 @@ int ompi_comm_finalize(void)
}
/* Shut down MPI_COMM_NULL */
OBJ_DESTRUCT( &ompi_mpi_comm_null );
OBJ_DESTRUCT(&ompi_mpi_comm_null);
/* Check whether we have some communicators left */
max = opal_pointer_array_get_size(&ompi_mpi_communicators);
for ( i=3; i<max; i++ ) {
comm = (ompi_communicator_t *)opal_pointer_array_get_item(&ompi_mpi_communicators, i);
if ( NULL != comm ) {
for (i = 3; i < max; i++) {
comm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_mpi_communicators, i);
if (NULL != comm) {
/* Communicator has not been freed before finalize */
OBJ_RELEASE(comm);
comm=(ompi_communicator_t *)opal_pointer_array_get_item(&ompi_mpi_communicators, i);
if ( NULL != comm ) {
comm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_mpi_communicators, i);
if (NULL != comm) {
/* Still here ? */
if ( !OMPI_COMM_IS_EXTRA_RETAIN(comm)) {
if (!OMPI_COMM_IS_EXTRA_RETAIN(comm)) {
/* For communicator that have been marked as "extra retain", we do not further
* enforce to decrease the reference counter once more. These "extra retain"
......@@ -471,9 +510,9 @@ int ompi_comm_finalize(void)
* parent communicator. Read the comment in comm_activate for
* a full explanation.
*/
if ( ompi_debug_show_handle_leaks && !(OMPI_COMM_IS_FREED(comm)) ){
opal_output(0,"WARNING: MPI_Comm still allocated in MPI_Finalize\n");
ompi_comm_dump ( comm);
if (ompi_debug_show_handle_leaks && !(OMPI_COMM_IS_FREED(comm))) {
opal_output(0, "WARNING: MPI_Comm still allocated in MPI_Finalize\n");
ompi_comm_dump(comm);
OBJ_RELEASE(comm);
}
}
......@@ -481,11 +520,17 @@ int ompi_comm_finalize(void)
}
}
OBJ_DESTRUCT (&ompi_mpi_communicators);
OBJ_DESTRUCT (&ompi_comm_f_to_c_table);
OBJ_DESTRUCT(&ompi_mpi_communicators);
OBJ_DESTRUCT(&ompi_comm_f_to_c_table);
printf("FIN 7ac\n");
fflush(stdout);
/* finalize communicator requests */
ompi_comm_request_fini ();
ompi_comm_request_fini();
printf("FIN 7ad\n");
fflush(stdout);
return OMPI_SUCCESS;
}
......@@ -495,45 +540,45 @@ int ompi_comm_finalize(void)
/********************************************************************************/
/* static functions */
static void ompi_comm_construct(ompi_communicator_t* comm)
static void ompi_comm_construct(ompi_communicator_t *comm)
{
comm->c_f_to_c_index = opal_pointer_array_add(&ompi_comm_f_to_c_table, comm);
comm->c_name[0] = '\0';
comm->c_contextid = MPI_UNDEFINED;
comm->c_name[0] = '\0';
comm->c_contextid = MPI_UNDEFINED;
comm->c_id_available = MPI_UNDEFINED;
comm->c_id_start_index = MPI_UNDEFINED;
comm->c_flags = 0;
comm->c_my_rank = 0;
comm->c_cube_dim = 0;
comm->c_local_group = NULL;
comm->c_flags = 0;
comm->c_my_rank = 0;
comm->c_cube_dim = 0;
comm->c_local_group = NULL;
comm->c_remote_group = NULL;
comm->error_handler = NULL;
comm->c_pml_comm = NULL;
comm->c_topo = NULL;
comm->c_coll = NULL;
comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
comm->error_handler = NULL;
comm->c_pml_comm = NULL;
comm->c_topo = NULL;
comm->c_coll = NULL;
comm->c_nbc_tag = MCA_COLL_BASE_TAG_NONBLOCKING_BASE;
/* A keyhash will be created if/when an attribute is cached on
this communicator */
comm->c_keyhash = NULL;
comm->c_keyhash = NULL;
comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM;
comm->errhandler_type = OMPI_ERRHANDLER_TYPE_COMM;
#ifdef OMPI_WANT_PERUSE
comm->c_peruse_handles = NULL;
#endif
OBJ_CONSTRUCT(&comm->c_lock, opal_mutex_t);
#if OPAL_ENABLE_FT_MPI
comm->any_source_enabled = true;
comm->any_source_offset = 0;
comm->comm_revoked = false;
comm->coll_revoked = false;
comm->c_epoch = 0;
comm->agreement_specific = NULL;
comm->any_source_enabled = true;
comm->any_source_offset = 0;
comm->comm_revoked = false;
comm->coll_revoked = false;
comm->c_epoch = 0;
comm->agreement_specific = NULL;
#endif
}
static void ompi_comm_destruct(ompi_communicator_t* comm)
static void ompi_comm_destruct(ompi_communicator_t *comm)
{
/* Note that the attributes were already released on this
communicator in ompi_comm_free() (i.e., from MPI_COMM_FREE /
......@@ -542,7 +587,7 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
/* Release the collective module */
if ( NULL != comm->c_coll ) {
if (NULL != comm->c_coll) {
mca_coll_base_comm_unselect(comm);
}
......@@ -561,8 +606,8 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
error, not cause a seg fault in pml_del_comm because it was
never pml_add_com'ed. */
if ( MPI_COMM_NULL != comm && OMPI_COMM_IS_PML_ADDED(comm) ) {
MCA_PML_CALL(del_comm (comm));
if (MPI_COMM_NULL != comm && OMPI_COMM_IS_PML_ADDED(comm)) {
MCA_PML_CALL(del_comm(comm));
}
/* Release topology module */
......@@ -572,64 +617,61 @@ static void ompi_comm_destruct(ompi_communicator_t* comm)
}
if (NULL != comm->c_local_group) {
OBJ_RELEASE ( comm->c_local_group );
OBJ_RELEASE(comm->c_local_group);
comm->c_local_group = NULL;
if ( OMPI_COMM_IS_INTRA(comm) ) {
if (OMPI_COMM_IS_INTRA(comm)) {
/* We have to decrement the ref count on the remote group
even if it is identical to the local one in case of
intra-comm */
OBJ_RELEASE ( comm->c_remote_group );
OBJ_RELEASE(comm->c_remote_group);
comm->c_remote_group = NULL;
}
}
if (NULL != comm->c_remote_group) {
OBJ_RELEASE ( comm->c_remote_group );
OBJ_RELEASE(comm->c_remote_group);
comm->c_remote_group = NULL;
}
if (NULL != comm->error_handler) {
OBJ_RELEASE ( comm->error_handler );
OBJ_RELEASE(comm->error_handler);
comm->error_handler = NULL;
}
#if OPAL_ENABLE_FT_MPI
if( NULL != comm->agreement_specific ) {
OBJ_RELEASE( comm->agreement_specific );
if (NULL != comm->agreement_specific) {
OBJ_RELEASE(comm->agreement_specific);
}
#endif /* OPAL_ENABLE_FT_MPI */
#endif /* OPAL_ENABLE_FT_MPI */
/* mark this cid as available */
if ( MPI_UNDEFINED != (int)comm->c_contextid &&
NULL != opal_pointer_array_get_item(&ompi_mpi_communicators,
comm->c_contextid)) {
opal_pointer_array_set_item ( &ompi_mpi_communicators,
comm->c_contextid, NULL);
if (MPI_UNDEFINED != (int) comm->c_contextid
&& NULL != opal_pointer_array_get_item(&ompi_mpi_communicators, comm->c_contextid)) {
opal_pointer_array_set_item(&ompi_mpi_communicators, comm->c_contextid, NULL);
}
/* reset the ompi_comm_f_to_c_table entry */
if ( MPI_UNDEFINED != comm->c_f_to_c_index &&
NULL != opal_pointer_array_get_item(&ompi_comm_f_to_c_table,
comm->c_f_to_c_index)) {
opal_pointer_array_set_item ( &ompi_comm_f_to_c_table,
comm->c_f_to_c_index, NULL);
if (MPI_UNDEFINED != comm->c_f_to_c_index
&& NULL != opal_pointer_array_get_item(&ompi_comm_f_to_c_table, comm->c_f_to_c_index)) {
opal_pointer_array_set_item(&ompi_comm_f_to_c_table, comm->c_f_to_c_index, NULL);
}
OBJ_DESTRUCT(&comm->c_lock);
}
#define OMPI_COMM_SET_INFO_FN(name, flag) \
static const char *ompi_comm_set_ ## name (opal_infosubscriber_t *obj, const char *key, const char *value) \
{ \
ompi_communicator_t *comm = (ompi_communicator_t *) obj; \
\
if (opal_str_to_bool(value)) { \
comm->c_assertions |= flag; \
} else { \
comm->c_assertions &= ~flag; \
} \
\
return OMPI_COMM_CHECK_ASSERT(comm, flag) ? "true" : "false"; \
#define OMPI_COMM_SET_INFO_FN(name, flag) \
static const char *ompi_comm_set_##name(opal_infosubscriber_t *obj, const char *key, \
const char *value) \
{ \
ompi_communicator_t *comm = (ompi_communicator_t *) obj; \
\
if (opal_str_to_bool(value)) { \
comm->c_assertions |= flag; \
} else { \
comm->c_assertions &= ~flag; \
} \
\
return OMPI_COMM_CHECK_ASSERT(comm, flag) ? "true" : "false"; \
}
OMPI_COMM_SET_INFO_FN(no_any_source, OMPI_COMM_ASSERT_NO_ANY_SOURCE)
......@@ -637,20 +679,24 @@ OMPI_COMM_SET_INFO_FN(no_any_tag, OMPI_COMM_ASSERT_NO_ANY_TAG)
OMPI_COMM_SET_INFO_FN(allow_overtake, OMPI_COMM_ASSERT_ALLOW_OVERTAKE)
OMPI_COMM_SET_INFO_FN(exact_length, OMPI_COMM_ASSERT_EXACT_LENGTH)
void ompi_comm_assert_subscribe (ompi_communicator_t *comm, int32_t assert_flag)
void ompi_comm_assert_subscribe(ompi_communicator_t *comm, int32_t assert_flag)
{
switch (assert_flag) {
case OMPI_COMM_ASSERT_NO_ANY_SOURCE:
opal_infosubscribe_subscribe (&comm->super, "mpi_assert_no_any_source", "false", ompi_comm_set_no_any_source);
opal_infosubscribe_subscribe(&comm->super, "mpi_assert_no_any_source", "false",
ompi_comm_set_no_any_source);
break;
case OMPI_COMM_ASSERT_NO_ANY_TAG:
opal_infosubscribe_subscribe (&comm->super, "mpi_assert_no_any_tag", "false", ompi_comm_set_no_any_tag);
opal_infosubscribe_subscribe(&comm->super, "mpi_assert_no_any_tag", "false",
ompi_comm_set_no_any_tag);
break;
case OMPI_COMM_ASSERT_ALLOW_OVERTAKE:
opal_infosubscribe_subscribe (&comm->super, "mpi_assert_allow_overtaking", "false", ompi_comm_set_allow_overtake);
opal_infosubscribe_subscribe(&comm->super, "mpi_assert_allow_overtaking", "false",
ompi_comm_set_allow_overtake);
break;
case OMPI_COMM_ASSERT_EXACT_LENGTH:
opal_infosubscribe_subscribe (&comm->super, "mpi_assert_exact_length", "false", ompi_comm_set_exact_length);
opal_infosubscribe_subscribe(&comm->super, "mpi_assert_exact_length", "false",
ompi_comm_set_exact_length);
break;
}
}
......@@ -31,44 +31,44 @@
#include "ompi_config.h"
#include "ompi/constants.h"
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#if HAVE_SYS_TIME_H
#include <sys/time.h>
# include <sys/time.h>
#endif
#include <fcntl.h>
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/util/alfg.h"
#include "opal/util/argv.h"
#include "opal/util/opal_getcwd.h"
#include "opal/util/opal_environ.h"
#include "opal/util/opal_getcwd.h"
#include "opal/util/path.h"
#include "opal/util/printf.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/util/printf.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "ompi/communicator/communicator.h"
#include "ompi/group/group.h"
#include "ompi/proc/proc.h"
#include "ompi/info/info.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/proc/proc.h"
#include "ompi/runtime/ompi_rte.h"
#include "ompi/info/info.h"
#include "ompi/dpm/dpm.h"
static opal_rng_buff_t rnd;
typedef struct {
ompi_communicator_t *comm;
int size;
struct ompi_request_t **reqs;
int buf;
ompi_communicator_t *comm;
int size;
struct ompi_request_t **reqs;
int buf;
} ompi_dpm_disconnect_obj;
static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs);
static int disconnect_waitall(int count, ompi_dpm_disconnect_obj **objs);
static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm);
static int start_dvm(char **hostfiles, char **dash_host);
......@@ -76,9 +76,7 @@ typedef struct {
opal_list_item_t super;
ompi_proc_t *p;
} ompi_dpm_proct_caddy_t;
static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t,
opal_list_item_t,
NULL, NULL);
static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t, opal_list_item_t, NULL, NULL);
/*
* Init the module
......@@ -95,12 +93,11 @@ int ompi_dpm_init(void)
return OMPI_SUCCESS;
}
int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
const char *port_string, bool send_first,
ompi_communicator_t **newcomm)
int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, const char *port_string,
bool send_first, ompi_communicator_t **newcomm)
{
int k, size, rsize, rank, rc, rportlen=0;
char **members = NULL, *nstring, *rport=NULL, *key, *pkey;
int k, size, rsize, rank, rc, rportlen = 0;
char **members = NULL, *nstring, *rport = NULL, *key, *pkey;
bool dense, isnew;
opal_process_name_t pname;
opal_list_t ilist, mlist, rlist;
......@@ -112,10 +109,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
pmix_status_t pret;
opal_proclist_t *plt;
ompi_communicator_t *newcomp=MPI_COMM_NULL;
ompi_communicator_t *newcomp = MPI_COMM_NULL;
ompi_proc_t *proc;
ompi_group_t *group=comm->c_local_group;
ompi_proc_t **proc_list=NULL, **new_proc_list = NULL;
ompi_group_t *group = comm->c_local_group;
ompi_proc_t **proc_list = NULL, **new_proc_list = NULL;
int32_t i;
ompi_group_t *new_group_pointer;
ompi_dpm_proct_caddy_t *cd;
......@@ -123,8 +120,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
/* set default error return */
*newcomm = MPI_COMM_NULL;
size = ompi_comm_size ( comm );
rank = ompi_comm_rank ( comm );
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
printf("ACC 1\n");
fflush(stdout);
/* the "send_first" end will append ":connect" to the port name and publish
* the list of its participating procs on that key. The receiving root proc
......@@ -146,7 +146,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
char *value = strrchr(port_string, '=');
assert(NULL != value);
rportlen = atoi(++value);
if (rportlen > 0) rportlen *= -1;
if (rportlen > 0)
rportlen *= -1;
goto bcast_rportlen;
}
......@@ -159,7 +160,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
opal_argv_append_nosize(&members, nstring);
free(nstring);
/* add the number of procs in this job */
(void)opal_asprintf(&nstring, "%d", size);
(void) opal_asprintf(&nstring, "%d", size);
opal_argv_append_nosize(&members, nstring);
free(nstring);
} else {
......@@ -167,10 +168,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
proc_list = group->grp_proc_pointers;
dense = true;
} else {
proc_list = (ompi_proc_t**)calloc(group->grp_proc_count,
sizeof(ompi_proc_t *));
for (i=0 ; i<group->grp_proc_count ; i++) {
if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) {
proc_list = (ompi_proc_t **) calloc(group->grp_proc_count, sizeof(ompi_proc_t *));
for (i = 0; i < group->grp_proc_count; i++) {
if (NULL == (proc_list[i] = ompi_group_peer_lookup(group, i))) {
OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND);
rc = OMPI_ERR_NOT_FOUND;
free(proc_list);
......@@ -179,10 +179,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
dense = false;
}
for (i=0; i < size; i++) {
for (i = 0; i < size; i++) {
opal_process_name_t proc_name;
if (ompi_proc_is_sentinel (proc_list[i])) {
proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc_list[i]);
if (ompi_proc_is_sentinel(proc_list[i])) {
proc_name = ompi_proc_sentinel_to_name((uintptr_t) proc_list[i]);
} else {
proc_name = proc_list[i]->super.proc_name;
}
......@@ -196,15 +196,19 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
proc_list = NULL;
}
}
printf("ACC 2\n");
fflush(stdout);
if (rank == root) {
printf("ROOT: %d\n", root);
fflush(stdout);
/* the roots for each side exchange their list of participants */
if (send_first) {
(void)opal_asprintf(&key, "%s:connect", port_string);
(void)opal_asprintf(&pkey, "%s:accept", port_string);
(void) opal_asprintf(&key, "%s:connect", port_string);
(void) opal_asprintf(&pkey, "%s:accept", port_string);
} else {
(void)opal_asprintf(&key, "%s:accept", port_string);
(void)opal_asprintf(&pkey, "%s:connect", port_string);
(void) opal_asprintf(&key, "%s:accept", port_string);
(void) opal_asprintf(&pkey, "%s:connect", port_string);
}
nstring = opal_argv_join(members, ':');
PMIX_INFO_LOAD(&info, key, nstring, PMIX_STRING);
......@@ -213,7 +217,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
free(key);
free(pkey);
rc = opal_pmix_base_exchange(&info, &pdat, 600); // give them 10 minutes
rc = opal_pmix_base_exchange(&info, &pdat, 600); // give them 10 minutes
PMIX_INFO_DESTRUCT(&info);
if (OPAL_SUCCESS != rc) {
PMIX_PDATA_DESTRUCT(&pdat);
......@@ -221,10 +225,12 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
/* save the result */
rport = strdup(pdat.value.data.string); // need this later
rportlen = strlen(rport) + 1; // retain the NULL terminator
rport = strdup(pdat.value.data.string); // need this later
rportlen = strlen(rport) + 1; // retain the NULL terminator
PMIX_PDATA_DESTRUCT(&pdat);
}
printf("ACC 3\n");
fflush(stdout);
bcast_rportlen:
/* if we aren't in a comm_spawn, the non-root members won't have
......@@ -233,12 +239,14 @@ bcast_rportlen:
/* bcast the list-length to all processes in the local comm */
rc = comm->c_coll->coll_bcast(&rportlen, 1, MPI_INT, root, comm,
comm->c_coll->coll_bcast_module);
comm->c_coll->coll_bcast_module);
if (OMPI_SUCCESS != rc) {
free(rport);
goto exit;
}
printf("ACC 4\n");
fflush(stdout);
/* This is the comm_spawn error case: the root couldn't do the pmix spawn
* and is now propagating to the local group that this operation has to
* fail. */
......@@ -249,7 +257,7 @@ bcast_rportlen:
if (rank != root) {
/* non root processes need to allocate the buffer manually */
rport = (char*)malloc(rportlen);
rport = (char *) malloc(rportlen);
if (NULL == rport) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
......@@ -257,16 +265,18 @@ bcast_rportlen:
}
/* now share the list of remote participants */
rc = comm->c_coll->coll_bcast(rport, rportlen, MPI_BYTE, root, comm,
comm->c_coll->coll_bcast_module);
comm->c_coll->coll_bcast_module);
if (OMPI_SUCCESS != rc) {
free(rport);
goto exit;
}
printf("ACC 5\n");
fflush(stdout);
/* initiate a list of participants for the connect,
* starting with our own members */
OBJ_CONSTRUCT(&mlist, opal_list_t);
for (i=0; NULL != members[i]; i++) {
for (i = 0; NULL != members[i]; i++) {
OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]);
plt = OBJ_NEW(opal_proclist_t);
memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t));
......@@ -290,7 +300,7 @@ bcast_rportlen:
OBJ_CONSTRUCT(&ilist, opal_list_t);
OBJ_CONSTRUCT(&rlist, opal_list_t);
for (i=0; NULL != members[i]; i++) {
for (i = 0; NULL != members[i]; i++) {
OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]);
plt = OBJ_NEW(opal_proclist_t);
memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t));
......@@ -300,7 +310,7 @@ bcast_rportlen:
/* if the rank is wildcard, then we are including all ranks
* of that job, and the next entry in members should be the
* number of procs in the job */
if (NULL == members[i+1]) {
if (NULL == members[i + 1]) {
/* just protect against the error */
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
opal_argv_free(members);
......@@ -310,9 +320,9 @@ bcast_rportlen:
rc = OMPI_ERR_BAD_PARAM;
goto exit;
}
rsize = strtoul(members[i+1], NULL, 10);
rsize = strtoul(members[i + 1], NULL, 10);
++i;
for (k=0; k < rsize; k++) {
for (k = 0; k < rsize; k++) {
pxproc.rank = k;
OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pxproc);
if (OPAL_SUCCESS != rc) {
......@@ -359,24 +369,32 @@ bcast_rportlen:
}
}
opal_argv_free(members);
printf("ACC 6\n");
fflush(stdout);
/* convert the list of members to a pmix_proc_t array */
nprocs = opal_list_get_size(&mlist);
PMIX_PROC_CREATE(procs, nprocs);
n = 0;
OPAL_LIST_FOREACH(plt, &mlist, opal_proclist_t) {
OPAL_LIST_FOREACH (plt, &mlist, opal_proclist_t) {
memcpy(&procs[n], &plt->procid, sizeof(pmix_proc_t));
++n;
}
OPAL_LIST_DESTRUCT(&mlist);
printf("ACC 7\n");
fflush(stdout);
/* tell the host RTE to connect us - this will download
* all known data for the nspace's of participating procs
* so that add_procs will not result in a slew of lookups */
pret = PMIx_Connect(procs, nprocs, NULL, 0);
printf("ACC 8\n");
fflush(stdout);
PMIX_PROC_FREE(procs, nprocs);
rc = opal_pmix_convert_status(pret);
if (OPAL_SUCCESS != rc) {
printf("ACC 8 fail\n");
fflush(stdout);
OMPI_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&ilist);
OPAL_LIST_DESTRUCT(&rlist);
......@@ -389,21 +407,21 @@ bcast_rportlen:
uint16_t u16;
opal_process_name_t wildcard_rank;
/* convert the list of new procs to a proc_t array */
new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist),
sizeof(ompi_proc_t *));
new_proc_list = (ompi_proc_t **) calloc(opal_list_get_size(&ilist), sizeof(ompi_proc_t *));
/* get the list of local peers for the new procs */
cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist);
cd = (ompi_dpm_proct_caddy_t *) opal_list_get_first(&ilist);
proc = cd->p;
wildcard_rank.jobid = proc->super.proc_name.jobid;
wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid;
/* retrieve the local peers */
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS,
&wildcard_rank, &val, PMIX_STRING);
printf("ACC 8 local peers\n");
fflush(stdout);
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS, &wildcard_rank, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != val) {
char **peers = opal_argv_split(val, ',');
free(val);
nprn = opal_argv_count(peers);
peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t));
peer_ranks = (uint32_t *) calloc(nprn, sizeof(uint32_t));
for (prn = 0; NULL != peers[prn]; prn++) {
peer_ranks[prn] = strtoul(peers[prn], NULL, 10);
}
......@@ -411,23 +429,28 @@ bcast_rportlen:
}
i = 0;
OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) {
printf("ACC 8 start foreach\n");
fflush(stdout);
OPAL_LIST_FOREACH (cd, &ilist, ompi_dpm_proct_caddy_t) {
proc = cd->p;
new_proc_list[i] = proc ;
new_proc_list[i] = proc;
/* ompi_proc_complete_init_single() initializes and optionally retrieves
* OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without
* them, we are just fine */
printf("ACC 8 wait init single\n");
fflush(stdout);
ompi_proc_complete_init_single(proc);
/* if this proc is local, then get its locality */
if (NULL != peer_ranks) {
for (prn=0; prn < nprn; prn++) {
for (prn = 0; prn < nprn; prn++) {
if (peer_ranks[prn] == proc->super.proc_name.vpid) {
/* get their locality string */
val = NULL;
OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING,
&proc->super.proc_name, &val, PMIX_STRING);
&proc->super.proc_name, &val, PMIX_STRING);
if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) {
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val);
u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality,
val);
free(val);
} else {
/* all we can say is that it shares our node */
......@@ -443,90 +466,113 @@ bcast_rportlen:
}
}
}
printf("ACC 8 done\n");
fflush(stdout);
++i;
}
if (NULL != peer_ranks) {
free(peer_ranks);
}
/* call add_procs on the new ones */
printf("ACC 8 add procs\n");
fflush(stdout);
rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist)));
printf("ACC 8 done add procs\n");
fflush(stdout);
free(new_proc_list);
new_proc_list = NULL;
if (OMPI_SUCCESS != rc) {
printf("ACC 8 error add procs\n");
fflush(stdout);
OMPI_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&ilist);
goto exit;
}
}
OPAL_LIST_DESTRUCT(&ilist);
printf("ACC 9\n");
fflush(stdout);
/* now deal with the remote group */
rsize = opal_list_get_size(&rlist);
new_group_pointer=ompi_group_allocate(rsize);
new_group_pointer = ompi_group_allocate(rsize);
if (NULL == new_group_pointer) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
OPAL_LIST_DESTRUCT(&rlist);
goto exit;
}
/* assign group elements */
i=0;
OPAL_LIST_FOREACH(cd, &rlist, ompi_dpm_proct_caddy_t) {
i = 0;
OPAL_LIST_FOREACH (cd, &rlist, ompi_dpm_proct_caddy_t) {
new_group_pointer->grp_proc_pointers[i++] = cd->p;
/* retain the proc */
OBJ_RETAIN(cd->p);
}
OPAL_LIST_DESTRUCT(&rlist);
printf("ACC 10\n");
fflush(stdout);
/* set up communicator structure */
rc = ompi_comm_set ( &newcomp, /* new comm */
comm, /* old comm */
group->grp_proc_count, /* local_size */
NULL, /* local_procs */
rsize, /* remote_size */
NULL , /* remote_procs */
NULL, /* attrs */
comm->error_handler, /* error handler */
NULL, /* topo component */
group, /* local group */
new_group_pointer /* remote group */
);
rc = ompi_comm_set(&newcomp, /* new comm */
comm, /* old comm */
group->grp_proc_count, /* local_size */
NULL, /* local_procs */
rsize, /* remote_size */
NULL, /* remote_procs */
NULL, /* attrs */
comm->error_handler, /* error handler */
NULL, /* topo component */
group, /* local group */
new_group_pointer /* remote group */
);
if (OMPI_SUCCESS != rc) {
goto exit;
}
printf("ACC 11\n");
fflush(stdout);
OBJ_RELEASE(new_group_pointer);
new_group_pointer = MPI_GROUP_NULL;
/* allocate comm_cid */
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
rc = ompi_comm_nextcid(newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void *) port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
if (OMPI_SUCCESS != rc) {
goto exit;
}
printf("ACC 12\n");
fflush(stdout);
/* activate comm and init coll-component */
rc = ompi_comm_activate ( &newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void*)port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
rc = ompi_comm_activate(&newcomp, /* new communicator */
comm, /* old communicator */
NULL, /* bridge comm */
&root, /* local leader */
(void *) port_string, /* rendezvous point */
send_first, /* send or recv first */
OMPI_COMM_CID_INTRA_PMIX); /* mode */
if (OMPI_SUCCESS != rc) {
goto exit;
}
printf("ACC 13\n");
fflush(stdout);
/* Question: do we have to re-start some low level stuff
to enable the usage of fast communication devices
between the two worlds ?
*/
exit:
exit:
printf("ACC exit\n");
fflush(stdout);
if (OMPI_SUCCESS != rc) {
if (MPI_COMM_NULL != newcomp && NULL != newcomp) {
OBJ_RELEASE(newcomp);
......@@ -545,7 +591,7 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
ompi_proc_t *proct;
opal_process_name_t proc_name;
for (i=0; i < group->grp_proc_count; i++) {
for (i = 0; i < group->grp_proc_count; i++) {
if (OMPI_GROUP_IS_DENSE(group)) {
proct = group->grp_proc_pointers[i];
} else {
......@@ -555,8 +601,8 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND);
return OMPI_ERR_NOT_FOUND;
}
if (ompi_proc_is_sentinel (proct)) {
proc_name = ompi_proc_sentinel_to_name ((uintptr_t)proct);
if (ompi_proc_is_sentinel(proct)) {
proc_name = ompi_proc_sentinel_to_name((uintptr_t) proct);
} else {
proc_name = proct->super.proc_name;
}
......@@ -566,7 +612,7 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
nm->name = proc_name;
/* need to maintain an ordered list to ensure the tracker signatures
* match across all procs */
OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) {
OPAL_LIST_FOREACH (n2, peers, opal_namelist_t) {
if (opal_compare_proc(nm->name, n2->name) < 0) {
opal_list_insert_pos(peers, &n2->super, &nm->super);
nm = NULL;
......@@ -616,7 +662,7 @@ int ompi_dpm_disconnect(ompi_communicator_t *comm)
nprocs = opal_list_get_size(&coll);
PMIX_PROC_CREATE(procs, nprocs);
n = 0;
OPAL_LIST_FOREACH(nm, &coll, opal_namelist_t) {
OPAL_LIST_FOREACH (nm, &coll, opal_namelist_t) {
OPAL_PMIX_CONVERT_NAME(&procs[n], &nm->name);
++n;
}
......@@ -640,28 +686,24 @@ typedef struct {
char **conflicts;
} dpm_conflicts_t;
static dpm_conflicts_t mapby_modifiers[] = {
{.name = "oversubscribe", .conflicts = (char *[]){"nooversubscribe", NULL}},
{.name = "nooversubscribe", .conflicts = (char *[]){"oversubscribe", NULL}},
{.name = ""}
};
static dpm_conflicts_t mapby_modifiers[] = {{.name = "oversubscribe",
.conflicts = (char *[]){"nooversubscribe", NULL}},
{.name = "nooversubscribe",
.conflicts = (char *[]){"oversubscribe", NULL}},
{.name = ""}};
static dpm_conflicts_t rankby_modifiers[] = {
{.name = ""}
};
static dpm_conflicts_t rankby_modifiers[] = {{.name = ""}};
static dpm_conflicts_t bindto_modifiers[] = {
{.name = ""}
};
static dpm_conflicts_t bindto_modifiers[] = {{.name = ""}};
static int check_modifiers(const char *modifier, char **checks, dpm_conflicts_t *conflicts)
{
int n, m, k;
for (n=0; 0 != strlen(conflicts[n].name); n++) {
for (n = 0; 0 != strlen(conflicts[n].name); n++) {
if (0 == strcasecmp(conflicts[n].name, modifier)) {
for (m=0; NULL != checks[m]; m++) {
for (k=0; NULL != conflicts[n].conflicts[k]; k++) {
for (m = 0; NULL != checks[m]; m++) {
for (k = 0; NULL != conflicts[n].conflicts[k]; k++) {
if (0 == strcasecmp(checks[m], conflicts[n].conflicts[k])) {
return OMPI_ERR_BAD_PARAM;
}
......@@ -673,12 +715,8 @@ static int check_modifiers(const char *modifier, char **checks, dpm_conflicts_t
return OMPI_SUCCESS;
}
static int dpm_convert(opal_list_t *infos,
const char *infokey,
const char *option,
const char *directive,
const char *modifier,
bool deprecated)
static int dpm_convert(opal_list_t *infos, const char *infokey, const char *option,
const char *directive, const char *modifier, bool deprecated)
{
opal_info_item_t *iptr;
char *ck, *ptr, *help_str = NULL;
......@@ -695,23 +733,23 @@ static int dpm_convert(opal_list_t *infos,
modifiers = rankby_modifiers;
} else if (0 == strcmp(option, PMIX_BINDTO)) {
modifiers = bindto_modifiers;
} else {
} else {
return OMPI_ERR_BAD_PARAM;
}
}
/* does the matching option already exist? */
OPAL_LIST_FOREACH(iptr, infos, opal_info_item_t) {
OPAL_LIST_FOREACH (iptr, infos, opal_info_item_t) {
if (PMIX_CHECK_KEY(&iptr->info, option)) {
ck = strdup(iptr->info.value.data.string);
if (NULL != (ptr = strchr(ck, ':'))) {
*ptr = '\0';
++ptr;
}
/* were we given a directive? */
/* were we given a directive? */
if (NULL != directive) {
/* does it conflict? */
if (0 != strncasecmp(ck, directive, strlen(directive))) {
if (0 != strncasecmp(ck, directive, strlen(directive))) {
opal_asprintf(&help_str, "Conflicting directives \"%s %s\"", ck, directive);
#if PMIX_NUMERIC_VERSION >= 0x00040000
/* TODO: remove strdup if PMIx_Get_attribute_string takes const char* */
......@@ -721,8 +759,8 @@ static int dpm_convert(opal_list_t *infos,
#else
attr = option;
#endif
opal_show_help("help-dpm.txt", "deprecated-fail", true,
infokey, attr, help_str);
opal_show_help("help-dpm.txt", "deprecated-fail", true, infokey, attr,
help_str);
free(help_str);
free(ck);
return OMPI_ERR_BAD_PARAM;
......@@ -746,7 +784,8 @@ static int dpm_convert(opal_list_t *infos,
opal_argv_free(tmp);
if (OMPI_SUCCESS != rc) {
/* we have a conflict */
opal_asprintf(&ptr, " Option %s\n Conflicting modifiers \"%s %s\"", option, infokey, modifier);
opal_asprintf(&ptr, " Option %s\n Conflicting modifiers \"%s %s\"",
option, infokey, modifier);
#if PMIX_NUMERIC_VERSION >= 0x00040000
/* TODO: remove strdup if PMIx_Get_attribute_string takes const char* */
char *option_dup = strdup(option);
......@@ -755,8 +794,7 @@ static int dpm_convert(opal_list_t *infos,
#else
attr = option;
#endif
opal_show_help("help-dpm.txt", "deprecated-fail", true,
infokey, attr, ptr);
opal_show_help("help-dpm.txt", "deprecated-fail", true, infokey, attr, ptr);
free(ptr);
free(ck);
return OMPI_ERR_BAD_PARAM;
......@@ -766,8 +804,8 @@ static int dpm_convert(opal_list_t *infos,
free(iptr->info.value.data.string);
iptr->info.value.data.string = ptr;
free(ck);
opal_show_help("help-dpm.txt", "deprecated-converted", true,
infokey, iptr->info.value.data.string);
opal_show_help("help-dpm.txt", "deprecated-converted", true, infokey,
iptr->info.value.data.string);
return OMPI_SUCCESS;
}
}
......@@ -790,10 +828,9 @@ static int dpm_convert(opal_list_t *infos,
opal_list_append(infos, &iptr->super);
/* alert them */
if(deprecated) {
if (deprecated) {
opal_asprintf(&help_str, "Key: %s Value: %s", option, ptr);
opal_show_help("help-dpm.txt", "deprecated-converted", true,
infokey, help_str);
opal_show_help("help-dpm.txt", "deprecated-converted", true, infokey, help_str);
}
free(help_str);
free(ptr);
......@@ -801,16 +838,13 @@ static int dpm_convert(opal_list_t *infos,
return OMPI_SUCCESS;
}
int ompi_dpm_spawn(int count, const char *array_of_commands[],
char **array_of_argv[],
const int array_of_maxprocs[],
const MPI_Info array_of_info[],
int ompi_dpm_spawn(int count, const char *array_of_commands[], char **array_of_argv[],
const int array_of_maxprocs[], const MPI_Info array_of_info[],
const char *port_name)
{
int rc, i, j;
int have_wdir=0;
int flag=0;
int have_wdir = 0;
int flag = 0;
opal_cstring_t *info_str;
uint32_t ui32;
bool personality = false;
......@@ -885,9 +919,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
app->maxprocs = array_of_maxprocs[i];
/* copy over the argv array */
if (MPI_ARGVS_NULL != array_of_argv &&
MPI_ARGV_NULL != array_of_argv[i]) {
for (j=0; NULL != array_of_argv[i][j]; j++) {
if (MPI_ARGVS_NULL != array_of_argv && MPI_ARGV_NULL != array_of_argv[i]) {
for (j = 0; NULL != array_of_argv[i][j]; j++) {
opal_argv_append_nosize(&app->argv, array_of_argv[i][j]);
}
}
......@@ -904,21 +937,21 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* Check for well-known info keys */
have_wdir = 0;
if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) {
if (array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL) {
/* check for personality - this is a job-level key */
ompi_info_get (array_of_info[i], "personality", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "personality", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_PERSONALITY */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"personality", "PMIX_PERSONALITY");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "personality",
"PMIX_PERSONALITY");
personality = true;
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_PERSONALITY", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_PERSONALITY", &info_str, &flag);
if (flag) {
personality = true;
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING);
......@@ -927,8 +960,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_PERSONALITY");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
personality = true;
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING);
......@@ -941,16 +974,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
* MPI standard ch. 10.3.4 */
/* check for 'host' */
ompi_info_get (array_of_info[i], "host", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "host", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
opal_argv_append_nosize(&dash_host, info_str->string);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_HOST", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_HOST", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -959,8 +992,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_HOST");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -970,8 +1003,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for 'wdir' */
ompi_info_get (array_of_info[i], "wdir", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "wdir", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -979,8 +1012,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
have_wdir = 1;
}
if (!have_wdir) {
ompi_info_get (array_of_info[i], "PMIX_WDIR", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_WDIR", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -991,8 +1024,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
if (!have_wdir) {
checkkey = PMIx_Get_attribute_string("PMIX_WDIR");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1003,8 +1036,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for 'mpi_initial_errhandler' */
ompi_info_get (array_of_info[i], "mpi_initial_errhandler", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "mpi_initial_errhandler", &info_str, &flag);
if (flag) {
/* this is set as an environment because it must be available
* before pmix_init */
opal_setenv("OMPI_MCA_mpi_initial_errhandler", info_str->string, true, &app->env);
......@@ -1021,16 +1054,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
* deprecated in the non-prefixed form */
/* check for 'hostfile' */
ompi_info_get (array_of_info[i], "hostfile", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "hostfile", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
opal_argv_append_nosize(&hostfiles, info_str->string);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_HOSTFILE", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_HOSTFILE", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1039,8 +1072,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_HOSTFILE");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1050,18 +1083,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for 'add-hostfile' */
ompi_info_get (array_of_info[i], "add-hostfile", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "add-hostfile", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_ADD_HOSTFILE */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"add-hostfile", "PMIX_ADD_HOSTFILE");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "add-hostfile",
"PMIX_ADD_HOSTFILE");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_ADD_HOSTFILE", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_ADD_HOSTFILE", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1069,8 +1102,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_ADD_HOSTFILE");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1079,18 +1112,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for 'add-host' */
ompi_info_get (array_of_info[i], "add-host", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "add-host", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_ADD_HOST */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"add-host", "PMIX_ADD_HOST");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "add-host",
"PMIX_ADD_HOST");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_ADD_HOST", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_ADD_HOST", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1098,8 +1131,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_ADD_HOST");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING);
opal_list_append(&app_info, &info->super);
......@@ -1108,34 +1141,33 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for env */
ompi_info_get (array_of_info[i], "env", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "env", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_ENVAR */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"env", "PMIX_ENVAR");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "env", "PMIX_ENVAR");
envars = opal_argv_split(info_str->string, '\n');
OBJ_RELEASE(info_str);
for (j=0; NULL != envars[j]; j++) {
for (j = 0; NULL != envars[j]; j++) {
opal_argv_append_nosize(&app->env, envars[j]);
}
opal_argv_free(envars);
}
ompi_info_get (array_of_info[i], "PMIX_ENVAR", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_ENVAR", &info_str, &flag);
if (flag) {
envars = opal_argv_split(info_str->string, '\n');
OBJ_RELEASE(info_str);
for (j=0; NULL != envars[j]; j++) {
for (j = 0; NULL != envars[j]; j++) {
opal_argv_append_nosize(&app->env, envars[j]);
}
opal_argv_free(envars);
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_ENVAR");
ompi_info_get (array_of_info[i], "PMIX_ENVAR", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_ENVAR", &info_str, &flag);
if (flag) {
envars = opal_argv_split(info_str->string, '\n');
OBJ_RELEASE(info_str);
for (j=0; NULL != envars[j]; j++) {
for (j = 0; NULL != envars[j]; j++) {
opal_argv_append_nosize(&app->env, envars[j]);
}
opal_argv_free(envars);
......@@ -1147,18 +1179,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
*
* This is a job-level key
*/
ompi_info_get (array_of_info[i], "ompi_prefix", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "ompi_prefix", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_PREFIX */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"ompi_prefix", "PMIX_PREFIX");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_prefix",
"PMIX_PREFIX");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_PREFIX", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_PREFIX", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1166,8 +1198,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_PREFIX");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1177,17 +1209,17 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'mapper' - a job-level key */
ompi_info_get(array_of_info[i], "mapper", &info_str, &flag);
if ( flag ) {
if (flag) {
/* deprecate --> PMIX_MAPPER */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"mapper", "PMIX_MAPPER");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "mapper",
"PMIX_MAPPER");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get(array_of_info[i], "PMIX_MAPPER", &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1196,7 +1228,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_MAPPER");
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1206,7 +1238,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'display_map' - a job-level key */
ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag);
if ( flag ) {
if (flag) {
rc = dpm_convert(&job_info, "display_map", PMIX_MAPBY, NULL, "DISPLAYMAP", true);
if (OMPI_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&job_info);
......@@ -1224,8 +1256,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
/* check for 'npernode' and 'ppr' - job-level key */
ompi_info_get (array_of_info[i], "npernode", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "npernode", &info_str, &flag);
if (flag) {
opal_asprintf(&tmp, "PPR:%s:NODE", info_str->string);
rc = dpm_convert(&job_info, "npernode", PMIX_MAPBY, tmp, NULL, true);
free(tmp);
......@@ -1244,8 +1276,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
return MPI_ERR_SPAWN;
}
}
ompi_info_get (array_of_info[i], "pernode", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "pernode", &info_str, &flag);
if (flag) {
rc = dpm_convert(&job_info, "pernode", PMIX_MAPBY, "PPR:1:NODE", NULL, true);
OBJ_RELEASE(info_str);
if (OMPI_SUCCESS != rc) {
......@@ -1262,8 +1294,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
return MPI_ERR_SPAWN;
}
}
ompi_info_get (array_of_info[i], "ppr", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "ppr", &info_str, &flag);
if (flag) {
/* must have correct syntax with two colons */
if (NULL == (tmp = strchr(info_str->string, ':'))) {
opal_show_help("help-dpm.txt", "bad-ppr", true, info_str->string);
......@@ -1315,7 +1347,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'map_by' - job-level key */
ompi_info_get(array_of_info[i], "map_by", &info_str, &flag);
if ( flag ) {
if (flag) {
rc = dpm_convert(&job_info, "map_by", PMIX_MAPBY, info_str->string, NULL, false);
OBJ_RELEASE(info_str);
if (OMPI_SUCCESS != rc) {
......@@ -1333,7 +1365,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
}
ompi_info_get(array_of_info[i], "PMIX_MAPBY", &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_MAPBY, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1342,7 +1374,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_MAPBY");
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_MAPBY, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1352,7 +1384,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'rank_by' - job-level key */
ompi_info_get(array_of_info[i], "rank_by", &info_str, &flag);
if ( flag ) {
if (flag) {
rc = dpm_convert(&job_info, "rank_by", PMIX_RANKBY, info_str->string, NULL, false);
OBJ_RELEASE(info_str);
if (OMPI_SUCCESS != rc) {
......@@ -1364,7 +1396,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
}
ompi_info_get(array_of_info[i], "PMIX_RANKBY", &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_RANKBY, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1373,7 +1405,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_RANKBY");
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_RANKBY, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1383,7 +1415,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'bind_to' - job-level key */
ompi_info_get(array_of_info[i], "bind_to", &info_str, &flag);
if ( flag ) {
if (flag) {
rc = dpm_convert(&job_info, "bind_to", PMIX_BINDTO, info_str->string, NULL, false);
OBJ_RELEASE(info_str);
if (OMPI_SUCCESS != rc) {
......@@ -1395,7 +1427,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
}
ompi_info_get(array_of_info[i], "PMIX_BINDTO", &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_BINDTO, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1404,7 +1436,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_BINDTO");
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_BINDTO, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1414,16 +1446,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* check for 'preload_binary' - job-level key */
ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag);
if ( flag ) {
if (flag) {
/* deprecate --> PMIX_PRELOAD_BIN */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"ompi_preload_binary", "PMIX_PRELOAD_BIN");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_preload_binary",
"PMIX_PRELOAD_BIN");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL);
opal_list_append(&job_info, &info->super);
}
ompi_info_get_bool(array_of_info[i], "PMIX_PRELOAD_BIN", &local_spawn, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL);
opal_list_append(&job_info, &info->super);
......@@ -1431,7 +1463,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_PRELOAD_BIN");
ompi_info_get_bool(array_of_info[i], checkkey, &local_spawn, &flag);
if ( flag ) {
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL);
opal_list_append(&job_info, &info->super);
......@@ -1439,18 +1471,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
#endif
/* check for 'preload_files' - job-level key */
ompi_info_get (array_of_info[i], "ompi_preload_files", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "ompi_preload_files", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_PRELOAD_FILES */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"ompi_preload_files", "PMIX_PRELOAD_FILES");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_preload_files",
"PMIX_PRELOAD_FILES");
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_PRELOAD_FILES", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_PRELOAD_FILES", &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1458,8 +1490,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_PRELOAD_FILES");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
info = OBJ_NEW(opal_info_item_t);
PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING);
opal_list_append(&job_info, &info->super);
......@@ -1472,15 +1504,15 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
*/
ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag);
if (flag && non_mpi) {
opal_show_help("help-dpm.txt", "deprecated-inform", true,
"ompi_non_mpi", "No longer relevant as RTE automatically detects this scenario");
opal_show_help("help-dpm.txt", "deprecated-inform", true, "ompi_non_mpi",
"No longer relevant as RTE automatically detects this scenario");
}
/* see if this is an MCA param that the user wants applied to the child job */
ompi_info_get (array_of_info[i], "ompi_param", &info_str, &flag);
if ( flag ) {
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"ompi_param", "PMIX_ENVAR");
ompi_info_get(array_of_info[i], "ompi_param", &info_str, &flag);
if (flag) {
opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_param",
"PMIX_ENVAR");
opal_argv_append_unique_nosize(&app->env, info_str->string, true);
OBJ_RELEASE(info_str);
}
......@@ -1488,11 +1520,11 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* see if user specified what to do with stdin - defaults to
* not forwarding stdin to child processes - job-level key
*/
ompi_info_get (array_of_info[i], "ompi_stdin_target", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "ompi_stdin_target", &info_str, &flag);
if (flag) {
/* deprecate --> PMIX_STDIN_TGT */
opal_show_help("help-dpm.txt", "deprecated-converted", true,
"ompi_stdin_target", "PMIX_STDIN_TGT");
opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_stdin_target",
"PMIX_STDIN_TGT");
if (0 == strcmp(info_str->string, "all")) {
ui32 = OPAL_VPID_WILDCARD;
} else if (0 == strcmp(info_str->string, "none")) {
......@@ -1505,8 +1537,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
opal_list_append(&job_info, &info->super);
OBJ_RELEASE(info_str);
}
ompi_info_get (array_of_info[i], "PMIX_STDIN_TGT", &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], "PMIX_STDIN_TGT", &info_str, &flag);
if (flag) {
if (0 == strcmp(info_str->string, "all")) {
ui32 = OPAL_VPID_WILDCARD;
} else if (0 == strcmp(info_str->string, "none")) {
......@@ -1521,8 +1553,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
}
#if PMIX_NUMERIC_VERSION >= 0x00040000
checkkey = PMIx_Get_attribute_string("PMIX_STDIN_TGT");
ompi_info_get (array_of_info[i], checkkey, &info_str, &flag);
if ( flag ) {
ompi_info_get(array_of_info[i], checkkey, &info_str, &flag);
if (flag) {
if (0 == strcmp(info_str->string, "all")) {
ui32 = OPAL_VPID_WILDCARD;
} else if (0 == strcmp(info_str->string, "none")) {
......@@ -1541,11 +1573,11 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
/* default value: If the user did not tell us where to look for the
* executable, we assume the current working directory
*/
if ( !have_wdir ) {
if (!have_wdir) {
char cwd[OPAL_PATH_MAX];
if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) {
OMPI_ERROR_LOG(rc);
PMIX_APP_FREE(apps, (size_t)count);
PMIX_APP_FREE(apps, (size_t) count);
opal_progress_event_users_decrement();
if (NULL != hostfiles) {
opal_argv_free(hostfiles);
......@@ -1569,7 +1601,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
PMIX_INFO_CREATE(app->info, ninfo);
app->ninfo = ninfo;
n = 0;
OPAL_LIST_FOREACH(info, &app_info, opal_info_item_t) {
OPAL_LIST_FOREACH (info, &app_info, opal_info_item_t) {
PMIX_INFO_XFER(&app->info[n], &info->info);
++n;
}
......@@ -1589,7 +1621,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[],
if (0 < ninfo) {
PMIX_INFO_CREATE(pinfo, ninfo);
n = 0;
OPAL_LIST_FOREACH(info, &job_info, opal_info_item_t) {
OPAL_LIST_FOREACH (info, &job_info, opal_info_item_t) {
PMIX_INFO_XFER(&pinfo[n], &info->info);
++n;
}
......@@ -1643,7 +1675,7 @@ int ompi_dpm_open_port(char *port_name)
r = opal_rand(&rnd);
opal_convert_process_name_to_string(&tmp, OMPI_PROC_MY_NAME);
snprintf(port_name, MPI_MAX_PORT_NAME-1, "%s:%u", tmp, r);
snprintf(port_name, MPI_MAX_PORT_NAME - 1, "%s:%u", tmp, r);
port_name[MPI_MAX_PORT_NAME - 1] = '\0';
free(tmp);
return OMPI_SUCCESS;
......@@ -1657,10 +1689,10 @@ int ompi_dpm_close_port(const char *port_name)
int ompi_dpm_dyn_init(void)
{
int root=0, rc;
int root = 0, rc;
bool send_first = true;
ompi_communicator_t *newcomm=NULL;
char *port_name=NULL, *tmp, *ptr;
ompi_communicator_t *newcomm = NULL;
char *port_name = NULL, *tmp, *ptr;
/* check for appropriate env variable */
tmp = getenv("OMPI_PARENT_PORT");
......@@ -1671,11 +1703,11 @@ int ompi_dpm_dyn_init(void)
/* the value passed to us may have quote marks around it to protect
* the value if passed on the command line. We must remove those
* to have a correct string
* to have a correct string
*/
if ('"' == tmp[0]) {
if ('"' == tmp[0]) {
/* if the first char is a quote, then so will the last one be */
tmp[strlen(tmp)-1] = '\0';
tmp[strlen(tmp) - 1] = '\0';
ptr = &tmp[1];
} else {
ptr = &tmp[0];
......@@ -1703,10 +1735,13 @@ int ompi_dpm_dyn_init(void)
snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT");
newcomm->c_flags |= OMPI_COMM_NAMEISSET;
FILE *dbg_out = fopen("/home/ompi_debug", "a");
fprintf(dbg_out, "OMPI_DPM INIT COMPLETE\n");
fclose(dbg_out);
return OMPI_SUCCESS;
}
/*
* finalize the module
*/
......@@ -1717,7 +1752,7 @@ int ompi_dpm_finalize(void)
static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int count)
{
for(int i = 0; i < count; i++) {
for (int i = 0; i < count; i++) {
if (NULL != objs[i]->reqs) {
free(objs[i]->reqs);
}
......@@ -1733,21 +1768,24 @@ static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int coun
and does the disconnect for all dynamic communicators */
int ompi_dpm_dyn_finalize(void)
{
int i,j=0, max=0;
ompi_dpm_disconnect_obj **objs=NULL;
ompi_communicator_t *comm=NULL;
int i, j = 0, max = 0;
ompi_dpm_disconnect_obj **objs = NULL;
ompi_communicator_t *comm = NULL;
if (1 < ompi_comm_num_dyncomm) {
objs = (ompi_dpm_disconnect_obj **) malloc(ompi_comm_num_dyncomm
* sizeof(ompi_dpm_disconnect_obj *));
if (1 <ompi_comm_num_dyncomm) {
objs = (ompi_dpm_disconnect_obj**)malloc(ompi_comm_num_dyncomm *
sizeof(ompi_dpm_disconnect_obj*));
printf("dc start\n");
fflush(stdout);
if (NULL == objs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
max = opal_pointer_array_get_size(&ompi_mpi_communicators);
for (i=3; i<max; i++) {
comm = (ompi_communicator_t*)opal_pointer_array_get_item(&ompi_mpi_communicators,i);
if (NULL != comm && OMPI_COMM_IS_DYNAMIC(comm)) {
for (i = 3; i < max; i++) {
comm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_mpi_communicators, i);
if (NULL != comm && OMPI_COMM_IS_DYNAMIC(comm)) {
objs[j++] = disconnect_init(comm);
}
}
......@@ -1757,8 +1795,14 @@ int ompi_dpm_dyn_finalize(void)
return OMPI_ERROR;
}
printf("waitall\n");
fflush(stdout);
disconnect_waitall(ompi_comm_num_dyncomm, objs);
printf("dc done 1\n");
fflush(stdout);
}
printf("dc done all\n");
fflush(stdout);
return OMPI_SUCCESS;
}
......@@ -1779,11 +1823,11 @@ The communicators can than be released.
static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm)
{
ompi_dpm_disconnect_obj *obj=NULL;
ompi_dpm_disconnect_obj *obj = NULL;
int ret;
int i;
obj = (ompi_dpm_disconnect_obj*)calloc(1,sizeof(ompi_dpm_disconnect_obj));
obj = (ompi_dpm_disconnect_obj *) calloc(1, sizeof(ompi_dpm_disconnect_obj));
if (NULL == obj) {
opal_output(0, "Could not allocate disconnect object");
return NULL;
......@@ -1796,7 +1840,7 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm)
}
obj->comm = comm;
obj->reqs = (ompi_request_t**)malloc(2*obj->size*sizeof(ompi_request_t *));
obj->reqs = (ompi_request_t **) malloc(2 * obj->size * sizeof(ompi_request_t *));
if (NULL == obj->reqs) {
opal_output(0, "Could not allocate request array for disconnect object");
free(obj);
......@@ -1805,10 +1849,9 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm)
/* initiate all isend_irecvs. We use a dummy buffer stored on
the object, since we are sending zero size messages anyway. */
for (i=0; i < obj->size; i++) {
ret = MCA_PML_CALL(irecv(&(obj->buf), 0, MPI_INT, i,
OMPI_COMM_BARRIER_TAG, comm,
&(obj->reqs[2*i])));
for (i = 0; i < obj->size; i++) {
ret = MCA_PML_CALL(
irecv(&(obj->buf), 0, MPI_INT, i, OMPI_COMM_BARRIER_TAG, comm, &(obj->reqs[2 * i])));
if (OMPI_SUCCESS != ret) {
opal_output(0, "dpm_disconnect_init: error %d in irecv to process %d", ret, i);
......@@ -1816,10 +1859,8 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm)
free(obj);
return NULL;
}
ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i,
OMPI_COMM_BARRIER_TAG,
MCA_PML_BASE_SEND_SYNCHRONOUS,
comm, &(obj->reqs[2*i+1])));
ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i, OMPI_COMM_BARRIER_TAG,
MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(obj->reqs[2 * i + 1])));
if (OMPI_SUCCESS != ret) {
opal_output(0, "dpm_disconnect_init: error %d in isend to process %d", ret, i);
......@@ -1841,16 +1882,19 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm)
* - call waitall on the overall request array
* - free the objects
*/
static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
static int disconnect_waitall(int count, ompi_dpm_disconnect_obj **objs)
{
ompi_request_t **reqs=NULL;
char *treq=NULL;
printf("RUN waitall\n");
fflush(stdout);
ompi_request_t **reqs = NULL;
char *treq = NULL;
int totalcount = 0;
int i;
int ret;
for (i=0; i<count; i++) {
for (i = 0; i < count; i++) {
if (NULL == objs[i]) {
opal_output(0, "Error in comm_disconnect_waitall");
return OMPI_ERROR;
......@@ -1859,21 +1903,28 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
totalcount += objs[i]->size;
}
reqs = (ompi_request_t**)malloc(2*totalcount*sizeof(ompi_request_t *));
reqs = (ompi_request_t **) malloc(2 * totalcount * sizeof(ompi_request_t *));
if (NULL == reqs) {
opal_output(0, "ompi_comm_disconnect_waitall: error allocating memory");
return OMPI_ERROR;
}
/* generate a single, large array of pending requests */
treq = (char *)reqs;
for (i=0; i<count; i++) {
memcpy(treq, objs[i]->reqs, 2*objs[i]->size * sizeof(ompi_request_t *));
treq += 2*objs[i]->size * sizeof(ompi_request_t *);
treq = (char *) reqs;
for (i = 0; i < count; i++) {
memcpy(treq, objs[i]->reqs, 2 * objs[i]->size * sizeof(ompi_request_t *));
treq += 2 * objs[i]->size * sizeof(ompi_request_t *);
}
/* force all non-blocking all-to-alls to finish */
ret = ompi_request_wait_all(2*totalcount, reqs, MPI_STATUSES_IGNORE);
printf("waitall request\n");
fflush(stdout);
for (i = 0; i < 2 * totalcount; i++) {
printf("Request %d\n", (int) reqs[i]->req_type);
fflush(stdout);
}
ret = ompi_request_wait_all(2 * totalcount, reqs, MPI_STATUSES_IGNORE);
/* Finally, free everything */
cleanup_dpm_disconnect_objs(objs, count);
......@@ -1885,12 +1936,12 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs)
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
static bool ompi_dpm_group_is_dyn(ompi_group_t *group, ompi_jobid_t thisjobid)
{
int size = group ? ompi_group_size (group) : 0;
int size = group ? ompi_group_size(group) : 0;
for (int i = 0 ; i < size ; ++i) {
opal_process_name_t name = ompi_group_get_proc_name (group, i);
for (int i = 0; i < size; ++i) {
opal_process_name_t name = ompi_group_get_proc_name(group, i);
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {
/* at least one is different */
......@@ -1916,17 +1967,17 @@ void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
return;
}
thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid;
thisjobid = ompi_group_get_proc_name(comm->c_local_group, 0).jobid;
/* loop over all processes in local group and check for
* a different jobid
*/
found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid);
found = ompi_dpm_group_is_dyn(comm->c_local_group, thisjobid);
if (!found) {
/* if inter-comm, loop over all processes in remote_group
* and see if any are different from thisjobid
*/
found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid);
found = ompi_dpm_group_is_dyn(comm->c_remote_group, thisjobid);
}
/* if a different jobid was found, set the disconnect flag*/
......@@ -1938,7 +1989,7 @@ void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm)
#if OMPI_HAVE_PRRTE
#define DVM_URI_MSG_LGTH 256
# define DVM_URI_MSG_LGTH 256
static void set_handler_default(int sig)
{
......@@ -1948,7 +1999,7 @@ static void set_handler_default(int sig)
act.sa_flags = 0;
sigemptyset(&act.sa_mask);
sigaction(sig, &act, (struct sigaction *)0);
sigaction(sig, &act, (struct sigaction *) 0);
}
static int start_dvm(char **hostfiles, char **dash_host)
......@@ -1996,7 +2047,7 @@ static int start_dvm(char **hostfiles, char **dash_host)
return OMPI_ERROR;
}
/* we need to start the PRRTE DVM first so we can
/* we need to start the PRRTE DVM first so we can
* spawn processes - see if they gave us any hostfile
* or dash-host options we should pass along */
opal_argv_append_nosize(&args, "prte");
......@@ -2072,25 +2123,23 @@ static int start_dvm(char **hostfiles, char **dash_host)
execv(cmd, args);
/* if I get here, the execv failed! */
opal_show_help("help-ess-base.txt", "ess-base:execv-error",
true, cmd, strerror(errno));
opal_show_help("help-ess-base.txt", "ess-base:execv-error", true, cmd, strerror(errno));
exit(1);
}
free(cmd);
/* I am the parent - wait to hear something back and
* report results
*/
close(p[1]); /* parent closes the write - prte will write its contact info to it*/
close(death_pipe[0]); /* parent closes the death_pipe's read */
close(p[1]); /* parent closes the write - prte will write its contact info to it*/
close(death_pipe[0]); /* parent closes the death_pipe's read */
opal_argv_free(args);
/* setup the buffer to read the DVM's uri */
buffer_length = DVM_URI_MSG_LGTH;
chunk = DVM_URI_MSG_LGTH-1;
chunk = DVM_URI_MSG_LGTH - 1;
num_chars_read = 0;
uri = (char*)malloc(buffer_length);
uri = (char *) malloc(buffer_length);
memset(uri, 0, buffer_length);
while (0 != (rc = read(p[0], &uri[num_chars_read], chunk))) {
......@@ -2105,7 +2154,7 @@ static int start_dvm(char **hostfiles, char **dash_host)
chunk -= rc;
if (0 == chunk) {
chunk = DVM_URI_MSG_LGTH;
uri = realloc((void*)uri, buffer_length+chunk);
uri = realloc((void *) uri, buffer_length + chunk);
memset(&uri[buffer_length], 0, chunk);
buffer_length += chunk;
}
......
......@@ -27,86 +27,84 @@
#include "ompi_config.h"
#include <stdio.h>
#include "opal/util/show_help.h"
#include "opal/util/printf.h"
#include "opal/util/show_help.h"
#include "ompi/info/info.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/params.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/communicator/communicator.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/dpm/dpm.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/info/info.h"
#include "ompi/memchecker.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#if OMPI_BUILD_MPI_PROFILING
#if OPAL_HAVE_WEAK_SYMBOLS
#pragma weak MPI_Comm_spawn = PMPI_Comm_spawn
#endif
#define MPI_Comm_spawn PMPI_Comm_spawn
# if OPAL_HAVE_WEAK_SYMBOLS
# pragma weak MPI_Comm_spawn = PMPI_Comm_spawn
# endif
# define MPI_Comm_spawn PMPI_Comm_spawn
#endif
static const char FUNC_NAME[] = "MPI_Comm_spawn";
int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info,
int root, MPI_Comm comm, MPI_Comm *intercomm,
int array_of_errcodes[])
int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, int root,
MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[])
{
int rank, rc=OMPI_SUCCESS, i, flag;
printf("MPI A\n");
fflush(stdout);
int rank, rc = OMPI_SUCCESS, i, flag;
bool send_first = false; /* we wait to be contacted */
ompi_communicator_t *newcomp=MPI_COMM_NULL;
char port_name[MPI_MAX_PORT_NAME]; char *port_string = NULL;
ompi_communicator_t *newcomp = MPI_COMM_NULL;
char port_name[MPI_MAX_PORT_NAME];
char *port_string = NULL;
bool non_mpi = false;
MEMCHECKER(
memchecker_comm(comm);
);
MEMCHECKER(memchecker_comm(comm););
printf("MPI B\n");
fflush(stdout);
if ( MPI_PARAM_CHECK ) {
if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
if ( ompi_comm_invalid (comm)) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM,
FUNC_NAME);
if (ompi_comm_invalid(comm)) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME);
}
if ( OMPI_COMM_IS_INTER(comm)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM,
FUNC_NAME);
if (OMPI_COMM_IS_INTER(comm)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME);
}
if ( (0 > root) || (ompi_comm_size(comm) <= root) ) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
FUNC_NAME);
if ((0 > root) || (ompi_comm_size(comm) <= root)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( NULL == intercomm ) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
FUNC_NAME);
if (NULL == intercomm) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
}
#if OPAL_ENABLE_FT_MPI
if( OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc)) ) {
if (OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc))) {
return OMPI_ERRHANDLER_INVOKE(comm, rc, FUNC_NAME);
}
#endif
rank = ompi_comm_rank ( comm );
if ( MPI_PARAM_CHECK ) {
if ( rank == root ) {
if ( NULL == command ) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
FUNC_NAME);
printf("MPI C\n");
fflush(stdout);
rank = ompi_comm_rank(comm);
if (MPI_PARAM_CHECK) {
if (rank == root) {
if (NULL == command) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( 0 > maxprocs ) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG,
FUNC_NAME);
if (0 > maxprocs) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if (NULL == info || ompi_info_is_freed(info)) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO,
FUNC_NAME);
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME);
}
}
}
printf("MPI D\n");
fflush(stdout);
if (!ompi_mpi_dynamics_is_enabled(FUNC_NAME)) {
return OMPI_ERRHANDLER_INVOKE(comm, OMPI_ERR_NOT_SUPPORTED, FUNC_NAME);
......@@ -114,61 +112,80 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
/* initialize the port name to avoid problems */
memset(port_name, 0, MPI_MAX_PORT_NAME);
printf("MPI E\n");
fflush(stdout);
/* See if the info key "ompi_non_mpi" was set to true */
if (rank == root) {
ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag);
}
if ( rank == root ) {
if (rank == root) {
if (!non_mpi) {
/* Open a port. The port_name is passed as an environment
variable to the children. */
if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) {
printf("NON_MPI\n");
fflush(stdout);
if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) {
goto error;
}
} else if (1 < ompi_comm_size(comm)) {
printf("OMPI_COMM_SIZE\n");
fflush(stdout);
/* we do not support non_mpi spawns on comms this size */
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs,
&info, port_name))) {
printf("SPAWN\n");
fflush(stdout);
if (OMPI_SUCCESS
!= (rc = ompi_dpm_spawn(1, &command, &argv, &maxprocs, &info, port_name))) {
goto error;
}
}
printf("MPI F\n");
fflush(stdout);
error:
printf("MPI G\n");
fflush(stdout);
if (OMPI_SUCCESS != rc) {
printf("NO SUCCESS\n");
fflush(stdout);
/* There was an error in one of the above stages,
* we still need to do the connect_accept stage so that
* non-root ranks do not deadlock.
* Add the error code to the port string for connect_accept
* to propagate the error code. */
(void)opal_asprintf(&port_string, "%s:error=%d", port_name, rc);
}
else {
(void) opal_asprintf(&port_string, "%s:error=%d", port_name, rc);
} else {
port_string = port_name;
}
printf("MPI H\n");
fflush(stdout);
if (non_mpi) {
newcomp = MPI_COMM_NULL;
} else {
rc = ompi_dpm_connect_accept (comm, root, port_string, send_first, &newcomp);
rc = ompi_dpm_connect_accept(comm, root, port_string, send_first, &newcomp);
}
if (OPAL_ERR_NOT_SUPPORTED == rc) {
opal_show_help("help-mpi-api.txt",
"MPI function not supported",
true,
FUNC_NAME,
printf("NOT SUPPORTED\n");
opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME,
"Underlying runtime environment does not support spawn functionality");
fflush(stdout);
}
if(port_string != port_name) {
if (port_string != port_name) {
free(port_string);
}
printf("MPI I\n");
fflush(stdout);
/* close the port */
if (rank == root && !non_mpi) {
ompi_dpm_close_port(port_name);
......@@ -176,11 +193,14 @@ error:
/* set error codes */
if (MPI_ERRCODES_IGNORE != array_of_errcodes) {
for ( i=0; i < maxprocs; i++ ) {
array_of_errcodes[i]=rc;
for (i = 0; i < maxprocs; i++) {
array_of_errcodes[i] = rc;
}
}
printf("MPI J\n", rc);
fflush(stdout);
*intercomm = newcomp;
OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME);
OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
}
......@@ -27,88 +27,86 @@
#include "ompi_config.h"
#include <stdio.h>
#include "opal/util/show_help.h"
#include "opal/util/printf.h"
#include "opal/util/show_help.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/params.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/communicator/communicator.h"
#include "ompi/dpm/dpm.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/info/info.h"
#include "ompi/dpm/dpm.h"
#include "ompi/memchecker.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#if OMPI_BUILD_MPI_PROFILING
#if OPAL_HAVE_WEAK_SYMBOLS
#pragma weak MPI_Comm_spawn_multiple = PMPI_Comm_spawn_multiple
#endif
#define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple
# if OPAL_HAVE_WEAK_SYMBOLS
# pragma weak MPI_Comm_spawn_multiple = PMPI_Comm_spawn_multiple
# endif
# define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple
#endif
static const char FUNC_NAME[] = "MPI_Comm_spawn_multiple";
int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[],
const int array_of_maxprocs[], const MPI_Info array_of_info[],
int root, MPI_Comm comm, MPI_Comm *intercomm,
int array_of_errcodes[])
const int array_of_maxprocs[], const MPI_Info array_of_info[], int root,
MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[])
{
int i=0, rc=0, rank=0, size=0, flag;
ompi_communicator_t *newcomp=MPI_COMM_NULL;
bool send_first=false; /* they are contacting us first */
char port_name[MPI_MAX_PORT_NAME]; char *port_string = NULL;
printf("MPI A\n");
fflush(stdout);
int i = 0, rc = 0, rank = 0, size = 0, flag;
ompi_communicator_t *newcomp = MPI_COMM_NULL;
bool send_first = false; /* they are contacting us first */
char port_name[MPI_MAX_PORT_NAME];
char *port_string = NULL;
bool non_mpi = false, cumulative = false;
MEMCHECKER(
memchecker_comm(comm);
);
MEMCHECKER(memchecker_comm(comm););
if ( MPI_PARAM_CHECK ) {
if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
if ( ompi_comm_invalid (comm)) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM,
FUNC_NAME);
if (ompi_comm_invalid(comm)) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME);
}
if ( OMPI_COMM_IS_INTER(comm)) {
if (OMPI_COMM_IS_INTER(comm)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME);
}
if ( (0 > root) || (ompi_comm_size(comm) <= root) ) {
if ((0 > root) || (ompi_comm_size(comm) <= root)) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( NULL == intercomm ) {
if (NULL == intercomm) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
}
rank = ompi_comm_rank ( comm );
if ( MPI_PARAM_CHECK ) {
if ( rank == root ) {
if ( 0 > count ) {
printf("MPI B\n");
fflush(stdout);
rank = ompi_comm_rank(comm);
if (MPI_PARAM_CHECK) {
if (rank == root) {
if (0 > count) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( NULL == array_of_commands ) {
if (NULL == array_of_commands) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( NULL == array_of_maxprocs ) {
if (NULL == array_of_maxprocs) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( NULL == array_of_info ) {
if (NULL == array_of_info) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_INFO, FUNC_NAME);
}
for (i = 0; i < count; ++i) {
if (NULL == array_of_info[i] ||
ompi_info_is_freed(array_of_info[i])) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO,
FUNC_NAME);
if (NULL == array_of_info[i] || ompi_info_is_freed(array_of_info[i])) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME);
}
/* If ompi_non_mpi is set to true on any info, it must
be set to true on all of them. Note that not
setting ompi_non_mpi is the same as setting it to
false. */
ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi,
&flag);
ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag);
if (flag && 0 == i) {
/* If this is the first info, save its
ompi_non_mpi value */
......@@ -119,32 +117,35 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o
/* If this info's effective value doesn't agree with
the rest of them, error */
if (cumulative != non_mpi) {
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(
MPI_ERR_INFO,
FUNC_NAME);
return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME);
}
}
for ( i=0; i<count; i++ ) {
if ( NULL == array_of_commands[i] ) {
for (i = 0; i < count; i++) {
if (NULL == array_of_commands[i]) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
if ( 0 > array_of_maxprocs[i] ) {
if (0 > array_of_maxprocs[i]) {
return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME);
}
}
}
}
printf("MPI C\n");
fflush(stdout);
if (!ompi_mpi_dynamics_is_enabled(FUNC_NAME)) {
return OMPI_ERRHANDLER_INVOKE(comm, OMPI_ERR_NOT_SUPPORTED, FUNC_NAME);
}
printf("MPI D\n");
fflush(stdout);
if (rank == root) {
if (MPI_INFO_NULL == array_of_info[0]) {
non_mpi = false;
} else {
ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi,
&flag);
ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, &flag);
if (!flag) {
non_mpi = false;
}
......@@ -152,7 +153,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o
}
#if OPAL_ENABLE_FT_MPI
if( OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc)) ) {
if (OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc))) {
return OMPI_ERRHANDLER_INVOKE(comm, rc, FUNC_NAME);
}
#endif
......@@ -160,12 +161,14 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o
/* initialize the port name to avoid problems */
memset(port_name, 0, MPI_MAX_PORT_NAME);
printf("MPI E\n");
fflush(stdout);
if ( rank == root ) {
if (rank == root) {
if (!non_mpi) {
/* Open a port. The port_name is passed as an environment
variable to the children. */
if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) {
if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) {
goto error;
}
} else if (1 < ompi_comm_size(comm)) {
......@@ -173,41 +176,46 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o
rc = OMPI_ERR_NOT_SUPPORTED;
goto error;
}
if (OMPI_SUCCESS != (rc = ompi_dpm_spawn(count, (const char **) array_of_commands,
array_of_argv, array_of_maxprocs,
array_of_info, port_name))) {
if (OMPI_SUCCESS
!= (rc = ompi_dpm_spawn(count, (const char **) array_of_commands, array_of_argv,
array_of_maxprocs, array_of_info, port_name))) {
goto error;
}
}
printf("MPI F\n");
fflush(stdout);
error:
printf("MPI G\n");
fflush(stdout);
if (OMPI_SUCCESS != rc) {
/* There was an error in one of the above stages,
* we still need to do the connect_accept stage so that
* non-root ranks do not deadlock.
* Add the error code to the port string for connect_accept
* to propagate the error code. */
(void)opal_asprintf(&port_string, "%s:error=%d", port_name, rc);
}
else {
(void) opal_asprintf(&port_string, "%s:error=%d", port_name, rc);
} else {
port_string = port_name;
}
if (non_mpi) {
newcomp = MPI_COMM_NULL;
} else {
rc = ompi_dpm_connect_accept (comm, root, port_string, send_first, &newcomp);
rc = ompi_dpm_connect_accept(comm, root, port_string, send_first, &newcomp);
}
printf("MPI H\n");
fflush(stdout);
if (OPAL_ERR_NOT_SUPPORTED == rc) {
opal_show_help("help-mpi-api.txt",
"MPI function not supported",
true,
FUNC_NAME,
opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME,
"Underlying runtime environment does not support spawn functionality");
}
if(port_string != port_name) {
if (port_string != port_name) {
free(port_string);
}
......@@ -216,21 +224,26 @@ error:
ompi_dpm_close_port(port_name);
}
printf("MPI I\n");
fflush(stdout);
/* set array of errorcodes */
if (MPI_ERRCODES_IGNORE != array_of_errcodes) {
if (MPI_COMM_NULL != newcomp) {
size = newcomp->c_remote_group->grp_proc_count;
} else {
for ( i=0; i < count; i++) {
for (i = 0; i < count; i++) {
size = size + array_of_maxprocs[i];
}
}
for ( i=0; i < size; i++ ) {
array_of_errcodes[i]=rc;
for (i = 0; i < size; i++) {
array_of_errcodes[i] = rc;
}
}
printf("MPI J\n");
fflush(stdout);
*intercomm = newcomp;
OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME);
OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
}
......@@ -20,33 +20,40 @@
#include "ompi_config.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/params.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/ompi_spc.h"
#include "ompi/runtime/params.h"
#if OMPI_BUILD_MPI_PROFILING
#if OPAL_HAVE_WEAK_SYMBOLS
#pragma weak MPI_Finalize = PMPI_Finalize
#endif
#define MPI_Finalize PMPI_Finalize
# if OPAL_HAVE_WEAK_SYMBOLS
# pragma weak MPI_Finalize = PMPI_Finalize
# endif
# define MPI_Finalize PMPI_Finalize
#endif
static const char FUNC_NAME[] = "MPI_Finalize";
int MPI_Finalize(void)
{
/* If --with-spc and ompi_mpi_spc_dump_enabled were specified, print
* all of the final SPC values aggregated across the whole MPI run.
* Also, free all SPC memory.
*/
printf("FIN 1\n");
fflush(stdout);
SPC_FINI();
printf("FIN 2\n");
fflush(stdout);
if (MPI_PARAM_CHECK) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
}
printf("FIN 3\n");
fflush(stdout);
/* Pretty simple */
return ompi_mpi_finalize();
......
......@@ -35,67 +35,67 @@
#include "ompi_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
# include <sys/types.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
# include <unistd.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
# include <sys/param.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
# include <netdb.h>
#endif
#include "opal/util/event.h"
#include "opal/util/output.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/mca/base/base.h"
#include "opal/sys/atomic.h"
#include "opal/runtime/opal.h"
#include "opal/util/show_help.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/mpool/base/mpool_base_tree.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/mca/pmix/pmix-internal.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_progress.h"
#include "opal/sys/atomic.h"
#include "opal/util/event.h"
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/timings.h"
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/errhandler/errcode.h"
#include "ompi/attribute/attribute.h"
#include "ompi/communicator/communicator.h"
#include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/message/message.h"
#include "ompi/op/op.h"
#include "ompi/dpm/dpm.h"
#include "ompi/errhandler/errcode.h"
#include "ompi/file/file.h"
#include "ompi/info/info.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/attribute/attribute.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/part/base/base.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/runtime/ompi_rte.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
#include "ompi/mca/hook/base/base.h"
#include "ompi/mca/io/base/base.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/part/base/base.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_bsend.h"
#include "ompi/runtime/params.h"
#include "ompi/dpm/dpm.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/topo/base/base.h"
#include "ompi/message/message.h"
#include "ompi/mpiext/mpiext.h"
#include "ompi/mca/hook/base/base.h"
#include "ompi/op/op.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/ompi_rte.h"
#include "ompi/runtime/params.h"
extern bool ompi_enable_timing;
static void fence_cbfunc(pmix_status_t status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
volatile bool *active = (volatile bool *) cbdata;
OPAL_ACQUIRE_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(active);
......@@ -105,18 +105,20 @@ int ompi_mpi_finalize(void)
{
int ret = MPI_SUCCESS;
opal_list_item_t *item;
ompi_proc_t** procs;
ompi_proc_t **procs;
size_t nprocs;
volatile bool active;
uint32_t key;
ompi_datatype_t * datatype;
ompi_datatype_t *datatype;
pmix_status_t rc;
printf("FIN 4\n");
fflush(stdout);
ompi_hook_base_mpi_finalize_top();
int32_t state = ompi_mpi_state;
if (state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
if (state < OMPI_MPI_STATE_INIT_COMPLETED || state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
/* Note that if we're not initialized or already finalized, we
cannot raise an MPI error. The best that we can do is
write something to stderr. */
......@@ -125,13 +127,11 @@ int ompi_mpi_finalize(void)
hostname = opal_gethostname();
if (state < OMPI_MPI_STATE_INIT_COMPLETED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize: not initialized",
true, hostname, pid);
opal_show_help("help-mpi-runtime.txt", "mpi_finalize: not initialized", true, hostname,
pid);
} else if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize:invoked_multiple_times",
true, hostname, pid);
opal_show_help("help-mpi-runtime.txt", "mpi_finalize:invoked_multiple_times", true,
hostname, pid);
}
return MPI_ERR_OTHER;
}
......@@ -140,36 +140,39 @@ int ompi_mpi_finalize(void)
ompi_mpiext_fini();
printf("FIN 5\n");
fflush(stdout);
/* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing
anything else in MPI_FINALIZE (to include setting up such that
MPI_FINALIZED will return true). */
if (NULL != ompi_mpi_comm_self.comm.c_keyhash) {
ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self,
ompi_mpi_comm_self.comm.c_keyhash);
ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self, ompi_mpi_comm_self.comm.c_keyhash);
OBJ_RELEASE(ompi_mpi_comm_self.comm.c_keyhash);
ompi_mpi_comm_self.comm.c_keyhash = NULL;
}
#if OPAL_ENABLE_FT_MPI
if( ompi_ftmpi_enabled ) {
ompi_communicator_t* comm = &ompi_mpi_comm_world.comm;
OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "FT: Rank %d entering finalize", ompi_comm_rank(comm)));
if (ompi_ftmpi_enabled) {
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
OPAL_OUTPUT_VERBOSE(
(50, ompi_ftmpi_output_handle, "FT: Rank %d entering finalize", ompi_comm_rank(comm)));
/* grpcomm barrier does not tolerate /new/ failures. Let's make sure
* we drain all preexisting failures before we proceed;
* TODO: when we have better failure support in the runtime, we can
* remove that agreement */
ompi_communicator_t* ncomm;
ompi_communicator_t *ncomm;
ret = ompi_comm_shrink_internal(comm, &ncomm);
if( MPI_SUCCESS != ret ) {
if (MPI_SUCCESS != ret) {
OMPI_ERROR_LOG(ret);
goto done;
}
/* do a barrier with closest neighbors in the ring, using doublering as
* it is synchronous and will help flush all past communications */
ret = ompi_coll_base_barrier_intra_doublering(ncomm, ncomm->c_coll->coll_barrier_module);
if( MPI_SUCCESS != ret ) {
if (MPI_SUCCESS != ret) {
OMPI_ERROR_LOG(ret);
goto done;
}
......@@ -178,12 +181,14 @@ int ompi_mpi_finalize(void)
/* finalize the fault tolerant infrastructure (revoke,
* failure propagator, etc). From now-on we do not tolerate new failures. */
OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "FT: Rank %05d turning off FT", ompi_comm_rank(comm)));
OPAL_OUTPUT_VERBOSE(
(50, ompi_ftmpi_output_handle, "FT: Rank %05d turning off FT", ompi_comm_rank(comm)));
ompi_comm_failure_detector_finalize();
ompi_comm_failure_propagator_finalize();
ompi_comm_revoke_finalize();
ompi_comm_rbcast_finalize();
opal_output_verbose(40, ompi_ftmpi_output_handle, "Rank %05d: DONE WITH FINALIZE", ompi_comm_rank(comm));
opal_output_verbose(40, ompi_ftmpi_output_handle, "Rank %05d: DONE WITH FINALIZE",
ompi_comm_rank(comm));
}
#endif /* OPAL_ENABLE_FT_MPI */
......@@ -193,18 +198,20 @@ int ompi_mpi_finalize(void)
COMM_SELF is destroyed / all the attribute callbacks have been
invoked) */
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state,
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
/* As finalize is the last legal MPI call, we are allowed to force the release
* of the user buffer used for bsend, before going anywhere further.
*/
(void)mca_pml_base_bsend_detach(NULL, NULL);
(void) mca_pml_base_bsend_detach(NULL, NULL);
#if OPAL_ENABLE_PROGRESS_THREADS == 0
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK);
#endif
printf("FIN 6\n");
fflush(stdout);
/* Redo ORTE calling opal_progress_event_users_increment() during
MPI lifetime, to get better latency when not using TCP */
opal_progress_event_users_increment();
......@@ -293,7 +300,8 @@ int ompi_mpi_finalize(void)
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active))) {
if (PMIX_SUCCESS
!= (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void *) &active))) {
ret = opal_pmix_convert_status(rc);
OMPI_ERROR_LOG(ret);
/* Reset the active flag to false, to avoid waiting for
......@@ -303,6 +311,9 @@ int ompi_mpi_finalize(void)
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
}
printf("FIN 7\n");
fflush(stdout);
/* Shut down any bindings-specific issues: C++, F77, F90 */
/* Remove all memory associated by MPI_REGISTER_DATAREP (per
......@@ -315,16 +326,19 @@ int ompi_mpi_finalize(void)
OBJ_DESTRUCT(&ompi_registered_datareps);
/* Remove all F90 types from the hash tables */
OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable)
OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_integer_hashtable)
OBJ_RELEASE(datatype);
OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable);
OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable)
OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_real_hashtable)
OBJ_RELEASE(datatype);
OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable);
OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable)
OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_complex_hashtable)
OBJ_RELEASE(datatype);
OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable);
printf("FIN 7a\n");
fflush(stdout);
/* Free communication objects */
/* free file resources */
......@@ -332,6 +346,9 @@ int ompi_mpi_finalize(void)
goto done;
}
printf("FIN 7b\n");
fflush(stdout);
/* free window resources */
if (OMPI_SUCCESS != (ret = ompi_win_finalize())) {
goto done;
......@@ -343,6 +360,8 @@ int ompi_mpi_finalize(void)
goto done;
}
printf("FIN 7c\n");
fflush(stdout);
/* free communicator resources. this MUST come before finalizing the PML
* as this will call into the pml */
......@@ -350,16 +369,19 @@ int ompi_mpi_finalize(void)
goto done;
}
printf("FIN 8\n");
fflush(stdout);
/* call del_procs on all allocated procs even though some may not be known
* to the pml layer. the pml layer is expected to be resilient and ignore
* any unknown procs. */
nprocs = 0;
procs = ompi_proc_get_allocated (&nprocs);
procs = ompi_proc_get_allocated(&nprocs);
MCA_PML_CALL(del_procs(procs, nprocs));
free(procs);
/* free pml resource */
if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) {
if (OMPI_SUCCESS != (ret = mca_pml_base_finalize())) {
goto done;
}
......@@ -380,7 +402,7 @@ int ompi_mpi_finalize(void)
/* Now that all MPI objects dealing with communications are gone,
shut down MCA types having to do with communications */
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) {
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework))) {
OMPI_ERROR_LOG(ret);
goto done;
}
......@@ -401,7 +423,7 @@ int ompi_mpi_finalize(void)
}
/* finalize the DPM subsystem */
if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) {
if (OMPI_SUCCESS != (ret = ompi_dpm_finalize())) {
goto done;
}
......@@ -420,6 +442,9 @@ int ompi_mpi_finalize(void)
goto done;
}
printf("FIN 9\n");
fflush(stdout);
/* Free all other resources */
/* free op resources */
......@@ -472,8 +497,11 @@ int ompi_mpi_finalize(void)
goto done;
}
printf("FIN 10\n");
fflush(stdout);
/* free proc resources */
if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
if (OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
goto done;
}
......@@ -494,7 +522,7 @@ int ompi_mpi_finalize(void)
ompi_rte_initialized = false;
/* Now close the hook framework */
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework) ) ) {
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework))) {
OMPI_ERROR_LOG(ret);
goto done;
}
......@@ -516,11 +544,19 @@ int ompi_mpi_finalize(void)
/* All done */
done:
printf("FIN 11\n");
fflush(stdout);
done:
printf("FIN 12\n");
fflush(stdout);
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED);
ompi_hook_base_mpi_finalize_bottom();
printf("FIN 13\n");
fflush(stdout);
return ret;
}
......@@ -37,79 +37,78 @@
#include "ompi_config.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
# include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
#include <pthread.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
# include <unistd.h>
#endif
#include "mpi.h"
#include "opal/class/opal_list.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/mca/base/base.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/threads/threads.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_progress.h"
#include "opal/util/arch.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/error.h"
#include "opal/util/stacktrace.h"
#include "opal/util/show_help.h"
#include "opal/runtime/opal.h"
#include "opal/util/event.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/util/timings.h"
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/stacktrace.h"
#include "opal/util/timings.h"
#include "ompi/constants.h"
#include "ompi/mpi/fortran/base/constants.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "ompi/attribute/attribute.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/constants.h"
#include "ompi/debuggers/debuggers.h"
#include "ompi/dpm/dpm.h"
#include "ompi/errhandler/errcode.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/interlib/interlib.h"
#include "ompi/request/request.h"
#include "ompi/message/message.h"
#include "ompi/op/op.h"
#include "ompi/mca/op/op.h"
#include "ompi/mca/op/base/base.h"
#include "ompi/file/file.h"
#include "ompi/attribute/attribute.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/info/info.h"
#include "ompi/interlib/interlib.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/part/base/base.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/hook/base/base.h"
#include "ompi/mca/io/base/base.h"
#include "ompi/runtime/ompi_rte.h"
#include "ompi/debuggers/debuggers.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/op/base/base.h"
#include "ompi/mca/op/op.h"
#include "ompi/mca/osc/base/base.h"
#include "ompi/mca/part/base/base.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_bsend.h"
#include "ompi/dpm/dpm.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/message/message.h"
#include "ompi/mpi/fortran/base/constants.h"
#include "ompi/mpiext/mpiext.h"
#include "ompi/mca/hook/base/base.h"
#include "ompi/op/op.h"
#include "ompi/proc/proc.h"
#include "ompi/request/request.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/ompi_rte.h"
#include "ompi/runtime/params.h"
#include "ompi/util/timings.h"
/* newer versions of gcc have poisoned this deprecated feature */
#ifdef HAVE___MALLOC_INITIALIZE_HOOK
#include "opal/mca/memory/base/base.h"
# include "opal/mca/memory/base/base.h"
/* So this sucks, but with OPAL in its own library that is brought in
implicity from libmpi, there are times when the malloc initialize
hook in the memory component doesn't work. So we have to do it
from here, since any MPI code is going to call MPI_Init... */
OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) =
opal_memory_base_malloc_init_hook;
OPAL_DECLSPEC void (*__malloc_initialize_hook)(void) = opal_memory_base_malloc_init_hook;
#endif
/* This is required for the boundaries of the hash tables used to store
......@@ -118,9 +117,9 @@ OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) =
#include <float.h>
#if OPAL_CC_USE_PRAGMA_IDENT
#pragma ident OMPI_IDENT_STRING
# pragma ident OMPI_IDENT_STRING
#elif OPAL_CC_USE_IDENT
#ident OMPI_IDENT_STRING
# ident OMPI_IDENT_STRING
#endif
const char ompi_version_string[] = OMPI_IDENT_STRING;
......@@ -144,37 +143,35 @@ opal_thread_t *ompi_mpi_main_thread = NULL;
*/
ompi_predefined_datatype_t *ompi_mpi_character_addr = &ompi_mpi_character;
ompi_predefined_datatype_t *ompi_mpi_logical_addr = &ompi_mpi_logical;
ompi_predefined_datatype_t *ompi_mpi_logical1_addr = &ompi_mpi_logical1;
ompi_predefined_datatype_t *ompi_mpi_logical2_addr = &ompi_mpi_logical2;
ompi_predefined_datatype_t *ompi_mpi_logical4_addr = &ompi_mpi_logical4;
ompi_predefined_datatype_t *ompi_mpi_logical8_addr = &ompi_mpi_logical8;
ompi_predefined_datatype_t *ompi_mpi_integer_addr = &ompi_mpi_integer;
ompi_predefined_datatype_t *ompi_mpi_integer1_addr = &ompi_mpi_integer1;
ompi_predefined_datatype_t *ompi_mpi_integer2_addr = &ompi_mpi_integer2;
ompi_predefined_datatype_t *ompi_mpi_integer4_addr = &ompi_mpi_integer4;
ompi_predefined_datatype_t *ompi_mpi_integer8_addr = &ompi_mpi_integer8;
ompi_predefined_datatype_t *ompi_mpi_logical_addr = &ompi_mpi_logical;
ompi_predefined_datatype_t *ompi_mpi_logical1_addr = &ompi_mpi_logical1;
ompi_predefined_datatype_t *ompi_mpi_logical2_addr = &ompi_mpi_logical2;
ompi_predefined_datatype_t *ompi_mpi_logical4_addr = &ompi_mpi_logical4;
ompi_predefined_datatype_t *ompi_mpi_logical8_addr = &ompi_mpi_logical8;
ompi_predefined_datatype_t *ompi_mpi_integer_addr = &ompi_mpi_integer;
ompi_predefined_datatype_t *ompi_mpi_integer1_addr = &ompi_mpi_integer1;
ompi_predefined_datatype_t *ompi_mpi_integer2_addr = &ompi_mpi_integer2;
ompi_predefined_datatype_t *ompi_mpi_integer4_addr = &ompi_mpi_integer4;
ompi_predefined_datatype_t *ompi_mpi_integer8_addr = &ompi_mpi_integer8;
ompi_predefined_datatype_t *ompi_mpi_integer16_addr = &ompi_mpi_integer16;
ompi_predefined_datatype_t *ompi_mpi_real_addr = &ompi_mpi_real;
ompi_predefined_datatype_t *ompi_mpi_real2_addr = &ompi_mpi_real2;
ompi_predefined_datatype_t *ompi_mpi_real4_addr = &ompi_mpi_real4;
ompi_predefined_datatype_t *ompi_mpi_real8_addr = &ompi_mpi_real8;
ompi_predefined_datatype_t *ompi_mpi_real16_addr = &ompi_mpi_real16;
ompi_predefined_datatype_t *ompi_mpi_dblprec_addr = &ompi_mpi_dblprec;
ompi_predefined_datatype_t *ompi_mpi_cplex_addr = &ompi_mpi_cplex;
ompi_predefined_datatype_t *ompi_mpi_complex4_addr = &ompi_mpi_complex4;
ompi_predefined_datatype_t *ompi_mpi_complex8_addr = &ompi_mpi_complex8;
ompi_predefined_datatype_t *ompi_mpi_real_addr = &ompi_mpi_real;
ompi_predefined_datatype_t *ompi_mpi_real2_addr = &ompi_mpi_real2;
ompi_predefined_datatype_t *ompi_mpi_real4_addr = &ompi_mpi_real4;
ompi_predefined_datatype_t *ompi_mpi_real8_addr = &ompi_mpi_real8;
ompi_predefined_datatype_t *ompi_mpi_real16_addr = &ompi_mpi_real16;
ompi_predefined_datatype_t *ompi_mpi_dblprec_addr = &ompi_mpi_dblprec;
ompi_predefined_datatype_t *ompi_mpi_cplex_addr = &ompi_mpi_cplex;
ompi_predefined_datatype_t *ompi_mpi_complex4_addr = &ompi_mpi_complex4;
ompi_predefined_datatype_t *ompi_mpi_complex8_addr = &ompi_mpi_complex8;
ompi_predefined_datatype_t *ompi_mpi_complex16_addr = &ompi_mpi_complex16;
ompi_predefined_datatype_t *ompi_mpi_complex32_addr = &ompi_mpi_complex32;
ompi_predefined_datatype_t *ompi_mpi_dblcplex_addr = &ompi_mpi_dblcplex;
ompi_predefined_datatype_t *ompi_mpi_2real_addr = &ompi_mpi_2real;
ompi_predefined_datatype_t *ompi_mpi_2dblprec_addr = &ompi_mpi_2dblprec;
ompi_predefined_datatype_t *ompi_mpi_2integer_addr = &ompi_mpi_2integer;
ompi_predefined_datatype_t *ompi_mpi_dblcplex_addr = &ompi_mpi_dblcplex;
ompi_predefined_datatype_t *ompi_mpi_2real_addr = &ompi_mpi_2real;
ompi_predefined_datatype_t *ompi_mpi_2dblprec_addr = &ompi_mpi_2dblprec;
ompi_predefined_datatype_t *ompi_mpi_2integer_addr = &ompi_mpi_2integer;
struct ompi_status_public_t *ompi_mpi_status_ignore_addr =
(ompi_status_public_t *) 0;
struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr =
(ompi_status_public_t *) 0;
struct ompi_status_public_t *ompi_mpi_status_ignore_addr = (ompi_status_public_t *) 0;
struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr = (ompi_status_public_t *) 0;
/*
* These variables are here, rather than under ompi/mpi/c/foo.c
......@@ -214,26 +211,25 @@ struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr =
* complain.
*/
#if OMPI_BUILD_FORTRAN_BINDINGS
# if OMPI_FORTRAN_CAPS
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUS_IGNORE;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUSES_IGNORE;
# elif OMPI_FORTRAN_PLAIN
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore;
# elif OMPI_FORTRAN_SINGLE_UNDERSCORE
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore_;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore_;
# elif OMPI_FORTRAN_DOUBLE_UNDERSCORE
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore__;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore__;
# else
# error Unrecognized Fortran name mangling scheme
# endif
# if OMPI_FORTRAN_CAPS
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &MPI_FORTRAN_STATUS_IGNORE;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &MPI_FORTRAN_STATUSES_IGNORE;
# elif OMPI_FORTRAN_PLAIN
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore;
# elif OMPI_FORTRAN_SINGLE_UNDERSCORE
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore_;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore_;
# elif OMPI_FORTRAN_DOUBLE_UNDERSCORE
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore__;
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore__;
# else
# error Unrecognized Fortran name mangling scheme
# endif
#else
MPI_Fint *MPI_F_STATUS_IGNORE = NULL;
MPI_Fint *MPI_F_STATUSES_IGNORE = NULL;
#endif /* OMPI_BUILD_FORTRAN_BINDINGS */
#endif /* OMPI_BUILD_FORTRAN_BINDINGS */
/* Constants for the Fortran layer. These values are referred to via
common blocks in the Fortran equivalents. See
......@@ -286,29 +282,25 @@ extern int ompi_mpi_event_tick_rate;
* Static functions used to configure the interactions between the OPAL and
* the runtime.
*/
static char*
_process_name_print_for_opal(const opal_process_name_t procname)
static char *_process_name_print_for_opal(const opal_process_name_t procname)
{
ompi_process_name_t* rte_name = (ompi_process_name_t*)&procname;
ompi_process_name_t *rte_name = (ompi_process_name_t *) &procname;
return OMPI_NAME_PRINT(rte_name);
}
static int
_process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2)
static int _process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2)
{
ompi_process_name_t* o1 = (ompi_process_name_t*)&p1;
ompi_process_name_t* o2 = (ompi_process_name_t*)&p2;
ompi_process_name_t *o1 = (ompi_process_name_t *) &p1;
ompi_process_name_t *o2 = (ompi_process_name_t *) &p2;
return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2);
}
static int _convert_string_to_process_name(opal_process_name_t *name,
const char* name_string)
static int _convert_string_to_process_name(opal_process_name_t *name, const char *name_string)
{
return ompi_rte_convert_string_to_process_name(name, name_string);
}
static int _convert_process_name_to_string(char** name_string,
const opal_process_name_t *name)
static int _convert_process_name_to_string(char **name_string, const opal_process_name_t *name)
{
return ompi_rte_convert_process_name_to_string(name_string, name);
}
......@@ -332,8 +324,7 @@ void ompi_mpi_thread_level(int requested, int *provided)
ompi_mpi_main_thread = opal_thread_get_self();
}
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided ==
MPI_THREAD_MULTIPLE);
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == MPI_THREAD_MULTIPLE);
}
static int ompi_register_mca_variables(void)
......@@ -352,10 +343,8 @@ static int ompi_register_mca_variables(void)
ompi_enable_timing = false;
(void) mca_base_var_register("ompi", "ompi", NULL, "timing",
"Request that critical timing loops be measured",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_enable_timing);
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &ompi_enable_timing);
#if OPAL_ENABLE_FT_MPI
/* Before loading any other part of the MPI library, we need to load
......@@ -363,7 +352,7 @@ static int ompi_register_mca_variables(void)
* FT is desired ON; this does override openmpi-params.conf, but not
* command line or env.
*/
if( ompi_ftmpi_enabled ) {
if (ompi_ftmpi_enabled) {
mca_base_var_load_extra_files("ft-mpi", false);
}
#endif /* OPAL_ENABLE_FT_MPI */
......@@ -373,34 +362,30 @@ static int ompi_register_mca_variables(void)
static void fence_release(pmix_status_t status, void *cbdata)
{
volatile bool *active = (volatile bool*)cbdata;
volatile bool *active = (volatile bool *) cbdata;
OPAL_ACQUIRE_OBJECT(active);
*active = false;
OPAL_POST_OBJECT(active);
}
static void evhandler_reg_callbk(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata)
{
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
opal_pmix_lock_t *lock = (opal_pmix_lock_t *) cbdata;
lock->status = status;
OPAL_PMIX_WAKEUP_THREAD(lock);
}
int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
bool reinit_ok)
int ompi_mpi_init(int argc, char **argv, int requested, int *provided, bool reinit_ok)
{
int ret;
ompi_proc_t** procs;
ompi_proc_t **procs;
size_t nprocs;
char *error = NULL;
volatile bool active;
bool background_fence = false;
pmix_info_t info[2];
pmix_status_t codes[1] = { PMIX_ERR_PROC_ABORTED };
pmix_status_t codes[1] = {PMIX_ERR_PROC_ABORTED};
pmix_status_t rc;
OMPI_TIMING_INIT(64);
opal_pmix_lock_t mylock;
......@@ -410,16 +395,14 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* Ensure that we were not already initialized or finalized. */
int32_t expected = OMPI_MPI_STATE_NOT_INITIALIZED;
int32_t desired = OMPI_MPI_STATE_INIT_STARTED;
int32_t desired = OMPI_MPI_STATE_INIT_STARTED;
opal_atomic_wmb();
if (!opal_atomic_compare_exchange_strong_32(&ompi_mpi_state, &expected,
desired)) {
if (!opal_atomic_compare_exchange_strong_32(&ompi_mpi_state, &expected, desired)) {
// If we failed to atomically transition ompi_mpi_state from
// NOT_INITIALIZED to INIT_STARTED, then someone else already
// did that, and we should return.
if (expected >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
opal_show_help("help-mpi-runtime.txt", "mpi_init: already finalized", true);
return MPI_ERR_OTHER;
} else if (expected >= OMPI_MPI_STATE_INIT_STARTED) {
// In some cases (e.g., oshmem_shmem_init()), we may call
......@@ -433,8 +416,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
return MPI_SUCCESS;
}
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
opal_show_help("help-mpi-runtime.txt", "mpi_init: invoked multiple times", true);
return MPI_ERR_OTHER;
}
}
......@@ -508,7 +490,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
goto error;
}
if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) {
if (OPAL_SUCCESS
!= (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) {
error = "ompi_mpi_init: opal_arch_set_fortran_logical_size failed";
goto error;
}
......@@ -547,7 +530,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided);
OMPI_TIMING_NEXT("initialization");
/* Setup RTE */
......@@ -567,7 +549,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* give it a name so we can distinguish it */
PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING);
OPAL_PMIX_CONSTRUCT_LOCK(&mylock);
PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock);
PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk,
(void *) &mylock);
OPAL_PMIX_WAIT_THREAD(&mylock);
rc = mylock.status;
OPAL_PMIX_DESTRUCT_LOCK(&mylock);
......@@ -609,9 +592,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
error = "ompi_op_base_open() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS
!= (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
error = "ompi_op_base_find_available() failed";
goto error;
}
......@@ -638,7 +621,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
error = "mca_bml_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_bml_base_init (1, ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS != (ret = mca_bml_base_init(1, ompi_mpi_thread_multiple))) {
error = "mca_bml_base_init() failed";
goto error;
}
......@@ -655,7 +638,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
error = "ompi_osc_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_part_base_framework, 0))) {
error = "ompi_part_base_open() failed";
goto error;
......@@ -669,9 +652,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* Select which MPI components to use */
if (OMPI_SUCCESS !=
(ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS
!= (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
error = "mca_pml_base_select() failed";
goto error;
}
......@@ -691,8 +673,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
}
OMPI_TIMING_NEXT("commit");
#if (OPAL_ENABLE_TIMING)
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex &&
opal_pmix_collect_all_data && !ompi_singleton) {
if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data
&& !ompi_singleton) {
if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) {
ret - opal_pmix_convert_status(rc);
error = "timing: pmix-barrier-1 failed";
......@@ -723,9 +705,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0,
fence_release,
(void*)&active))) {
if (PMIX_SUCCESS
!= (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void *) &active))) {
ret = opal_pmix_convert_status(rc);
error = "PMIx_Fence_nb() failed";
goto error;
......@@ -739,8 +720,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL);
rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active);
if( PMIX_SUCCESS != rc) {
rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void *) &active);
if (PMIX_SUCCESS != rc) {
ret = opal_pmix_convert_status(rc);
error = "PMIx_Fence() failed";
goto error;
......@@ -753,30 +734,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
OMPI_TIMING_NEXT("modex");
/* select buffered send allocator component to be used */
if( OMPI_SUCCESS !=
(ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS != (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) {
error = "mca_pml_base_bsend_init() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS
!= (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
error = "mca_coll_base_find_available() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS
!= (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
error = "ompi_osc_base_find_available() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS,
ompi_mpi_thread_multiple))) {
if (OMPI_SUCCESS
!= (ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) {
error = "mca_part_base_select() failed";
goto error;
}
......@@ -860,7 +838,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* start PML/BTL's */
ret = MCA_PML_CALL(enable(true));
if( OMPI_SUCCESS != ret ) {
if (OMPI_SUCCESS != ret) {
error = "PML control failed";
goto error;
}
......@@ -868,15 +846,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* some btls/mtls require we call add_procs with all procs in the job.
* since the btls/mtls have no visibility here it is up to the pml to
* convey this requirement */
if (mca_pml_base_requires_world ()) {
if (NULL == (procs = ompi_proc_world (&nprocs))) {
if (mca_pml_base_requires_world()) {
if (NULL == (procs = ompi_proc_world(&nprocs))) {
error = "ompi_proc_world () failed";
goto error;
}
} else {
/* add all allocated ompi_proc_t's to PML (below the add_procs limit this
* behaves identically to ompi_proc_world ()) */
if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) {
if (NULL == (procs = ompi_proc_get_allocated(&nprocs))) {
error = "ompi_proc_get_allocated () failed";
goto error;
}
......@@ -887,8 +865,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
Otherwise, if we got some other failure, fall through to print
a generic message. */
if (OMPI_ERR_UNREACH == ret) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init:startup:pml-add-procs-fail", true);
opal_show_help("help-mpi-runtime.txt", "mpi_init:startup:pml-add-procs-fail", true);
error = NULL;
goto error;
} else if (OMPI_SUCCESS != ret) {
......@@ -902,19 +879,23 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
#if OPAL_ENABLE_FT_MPI
/* initialize the fault tolerant infrastructure (revoke, detector,
* propagator) */
if( ompi_ftmpi_enabled ) {
if (ompi_ftmpi_enabled) {
const char *evmethod;
rc = ompi_comm_rbcast_init();
if( OMPI_SUCCESS != rc ) return rc;
if (OMPI_SUCCESS != rc)
return rc;
rc = ompi_comm_revoke_init();
if( OMPI_SUCCESS != rc ) return rc;
if (OMPI_SUCCESS != rc)
return rc;
rc = ompi_comm_failure_propagator_init();
if( OMPI_SUCCESS != rc ) return rc;
if (OMPI_SUCCESS != rc)
return rc;
rc = ompi_comm_failure_detector_init();
if( OMPI_SUCCESS != rc ) return rc;
if (OMPI_SUCCESS != rc)
return rc;
evmethod = event_base_get_method(opal_sync_event_base);
if( 0 == strcmp("select", evmethod) ) {
if (0 == strcmp("select", evmethod)) {
opal_show_help("help-mpi-ft.txt", "module:event:selectbug", true);
}
}
......@@ -924,8 +905,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
* Dump all MCA parameters if requested
*/
if (ompi_mpi_show_mca_params) {
ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank,
nprocs,
ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, nprocs,
ompi_process_info.nodename);
}
......@@ -949,8 +929,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
active = true;
OPAL_POST_OBJECT(&active);
PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1,
fence_release, (void*)&active))) {
if (PMIX_SUCCESS
!= (rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void *) &active))) {
ret = opal_pmix_convert_status(rc);
error = "PMIx_Fence_nb() failed";
goto error;
......@@ -994,7 +974,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
e.g. hierarch, might create subcommunicators. The threadlevel
requested by all processes is required in order to know
which cid allocation algorithm can be used. */
if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) {
if (OMPI_SUCCESS != (ret = ompi_comm_cid_init())) {
error = "ompi_mpi_init: ompi_comm_cid_init failed";
goto error;
}
......@@ -1003,14 +983,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
(since dpm.mark_dyncomm is not set in the communicator creation
function else), but before dpm.dyncom_init, since this function
might require collective for the CID allocation. */
if (OMPI_SUCCESS !=
(ret = mca_coll_base_comm_select(MPI_COMM_WORLD))) {
if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(MPI_COMM_WORLD))) {
error = "mca_coll_base_comm_select(MPI_COMM_WORLD) failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_coll_base_comm_select(MPI_COMM_SELF))) {
if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(MPI_COMM_SELF))) {
error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed";
goto error;
}
......@@ -1050,20 +1028,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
#if OPAL_ENABLE_FT_MPI
/* start the failure detector */
if( ompi_ftmpi_enabled ) {
if (ompi_ftmpi_enabled) {
rc = ompi_comm_failure_detector_start();
if( OMPI_SUCCESS != rc ) return rc;
if (OMPI_SUCCESS != rc)
return rc;
}
#endif
/* Fall through */
error:
error:
if (ret != OMPI_SUCCESS) {
/* Only print a message if one was not already printed */
if (NULL != error && OMPI_ERR_SILENT != ret) {
const char *err_msg = opal_strerror(ret);
opal_show_help("help-mpi-runtime.txt",
"mpi_init:startup:internal-failure", true,
opal_show_help("help-mpi-runtime.txt", "mpi_init:startup:internal-failure", true,
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
}
ompi_hook_base_mpi_init_error(argc, argv, requested, provided);
......@@ -1077,13 +1055,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
/* Initialize the arrays used to store the F90 types returned by the
* MPI_Type_create_f90_XXX functions.
*/
OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t);
OBJ_CONSTRUCT(&ompi_mpi_f90_integer_hashtable, opal_hash_table_t);
opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */);
OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t);
OBJ_CONSTRUCT(&ompi_mpi_f90_real_hashtable, opal_hash_table_t);
opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP);
OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t);
OBJ_CONSTRUCT(&ompi_mpi_f90_complex_hashtable, opal_hash_table_t);
opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP);
/* All done. Wasn't that simple? */
......
......@@ -7,58 +7,42 @@
* Sample MPI "hello world" application in C
*/
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
#include "mpi.h"
#define BUFFLEN 64
extern char **environ;
/* Spawn Modes (dynamic job spawning):
* 1: Spawn just one process (in one job)
* 2: Spawn 2 processes in 2 different jobs
* 3: Spawn 2 prcoesses in one (shared) job
* 3: Spawn 3 processes in one (shared) job
* 0 or other: Do not spawn a dynamic process/job
*/
#define SPAWN_MODE 0
#define SPAWN_MODE 1
void errorExit(char* msg);
int main(int argc, char *argv[])
{
FILE *ptr = fopen("/home/test_out", "a");
fprintf(ptr, "TEST APP STARTED\n");
char **s = environ;
/*void notifyProcessAgent(pid_t pid, int rank, const char *eventInfo) {
struct sockaddr_un strAddr;
socklen_t lenAddr;
int fdSock;
if ((fdSock=socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
errorExit("socket");
}
strAddr.sun_family=AF_LOCAL; // Unix domain
strcpy(strAddr.sun_path, SOCKET_PATH);
lenAddr=sizeof(strAddr.sun_family)+strlen(strAddr.sun_path);
if (connect(fdSock, (struct sockaddr*)&strAddr, lenAddr) !=0 ) {
errorExit("connect");
// PRINT ENV
for (; *s; s++) {
fprintf(ptr, "%s\n", *s);
}
char info2Send[BUFFLEN];
snprintf(info2Send, BUFFLEN+1, "%s: %d, %d", eventInfo, pid, rank);
if (send(fdSock, info2Send, BUFFLEN+1, 0) < 0) {
errorExit("send");
char *parentPort = getenv("OMPI_PARENT_PORT");
if (parentPort) {
fprintf(ptr, "Hey we have a parent port! %s", parentPort);
}
printf("\nData send!\n");
char rank2Recv[BUFFLEN];
recv(fdSock, rank2Recv, BUFFLEN+1, 0);
int receivedNumber = (int)strtol(rank2Recv, NULL, 0);
printf("Received from server: %d\n", receivedNumber);
close(fdSock);
}*/
int main(int argc, char* argv[]) {
fclose(ptr);
int rank, size, len;
pid_t pid;
......@@ -67,80 +51,97 @@ int main(int argc, char* argv[]) {
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
pid = getpid();
//notifyProcessAgent(pid, rank, "Spawned");
// notifyProcessAgent(pid, rank, "Spawned");
printf("Hello, world, I am %d of %d, PID: %d\n", rank, size, pid);
fflush(stdout);
// dynamically spawn child process
// https://mpi.deino.net/mpi_functions/MPI_Comm_spawn.html
if (1 == SPAWN_MODE) {
int np = 1;
int errcodes[1];
int np = 2;
int errcodes[2];
MPI_Comm parentcomm, intercomm;
MPI_Comm_get_parent( &parentcomm );
MPI_Comm_get_parent(&parentcomm);
if (parentcomm == MPI_COMM_NULL) {
MPI_Comm_spawn( "hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0,
MPI_COMM_WORLD, &intercomm, errcodes );
MPI_Comm_spawn("/home/ompi/rank-swapper-agent/hello", MPI_ARGV_NULL, np, MPI_INFO_NULL,
0, MPI_COMM_WORLD, &intercomm, errcodes);
printf("I'm the parent.\n");
fflush(stdout);
} else {
printf("I'm the spawned.\n");
fflush(stdout);
}
if (0 != errcodes[0]) {
printf("ERROR_SPAWN: code: %d\n", errcodes[0]);
fflush(stdout);
}
// (instead of Comm_multiple) spawns a second, different intercommunicator
// (instead of Comm_multiple) spawns a second, different intercommunicator
} else if (2 == SPAWN_MODE) {
int np = 1;
int errcodes[1];
MPI_Comm parentcomm, intercomm;
MPI_Comm_get_parent( &parentcomm );
int np = 2;
int errcodes[2];
MPI_Comm parentcomm;
MPI_Comm intercomm[2];
MPI_Comm_get_parent(&parentcomm);
if (parentcomm == MPI_COMM_NULL) {
for (int i = 0; i < 2; i++) {
MPI_Comm_spawn( "hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0,
MPI_COMM_WORLD, &intercomm, errcodes );
MPI_Comm_spawn("/home/ompi/rank-swapper-agent/hello", MPI_ARGV_NULL, np,
MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm[i], errcodes);
if (0 != errcodes[0]) {
printf("ERROR_SPAWN: code: %d\n", errcodes[0]);
fflush(stdout);
}
}
printf("I'm the parent.\n");
fflush(stdout);
} else {
printf("I'm the spawned.\n");
fflush(stdout);
}
} else if (3 == SPAWN_MODE) {
int np[2] = { 1, 1 };
int errcodes[2];
int np[2] = {2, 1};
int errcodes[3];
MPI_Comm parentcomm, intercomm;
char *cmds[2] = { "hello", "hello" };
MPI_Info infos[2] = { MPI_INFO_NULL, MPI_INFO_NULL };
MPI_Comm_get_parent( &parentcomm );
char *cmds[3] = {"/home/ompi/rank-swapper-agent/hello",
"/home/ompi/rank-swapper-agent/hello",
"/home/ompi/rank-swapper-agent/hello"};
MPI_Info infos[3] = {MPI_INFO_NULL, MPI_INFO_NULL, MPI_INFO_NULL};
MPI_Comm_get_parent(&parentcomm);
if (parentcomm == MPI_COMM_NULL) {
// Create n more processes using the "hello" executable
MPI_Comm_spawn_multiple(2, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes );
MPI_Comm_spawn_multiple(2, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD,
&intercomm, errcodes);
printf("I'm the parent.\n");
fflush(stdout);
} else {
printf("I'm the spawned.\n");
fflush(stdout);
}
for (int i = 0; i < 2; i++) {
for (int i = 0; i < 3; i++) {
if (0 != errcodes[i]) {
printf("ERROR_SPAWN: code: %d\n", errcodes[i]);
fflush(stdout);
}
}
}
sleep(5);
// printf("Sleeping.\n");
// fflush(stdout);
// sleep(5);
// printf("Done sleeping.\n");
// fflush(stdout);
MPI_Finalize();
//notifyProcessAgent(pid, rank, "Ended");
// notifyProcessAgent(pid, rank, "Ended");
return 0;
}