From d9655f8afb0b6e92f2a79e09e1b5a20cc975bcaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Pascal=20Becker?= <rene.becker2@gmx.de> Date: Tue, 5 Mar 2024 17:55:30 +0100 Subject: [PATCH] Add Debug Statements --- ompi/communicator/comm_init.c | 45 +- ompi/dpm/dpm.c | 679 +++++++++++++++++-------------- ompi/mpi/c/comm_spawn.c | 137 +++---- ompi/mpi/c/comm_spawn_multiple.c | 149 +++---- ompi/mpi/c/finalize.c | 21 +- ompi/runtime/ompi_mpi_finalize.c | 168 +++++--- ompi/runtime/ompi_mpi_init.c | 340 ++++++++-------- rank-swapper-agent/hello_c.c | 8 +- 8 files changed, 821 insertions(+), 726 deletions(-) diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index dc17a58041..036d84d09e 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -63,6 +63,7 @@ #define PMIX_DYNAMIC_ID_ENV_VAR "DPM_PMIX_DYNAMIC_ID" #define JOBID_ENV_VAR "SLURM_VRM_JOBID" #define SLURM_JOBID_ENV_VAR "SLURM_JOB_ID" +#define PMIX_NAMESPACE_ENV_VAR "PMIX_NAMESPACE" /* ** Table for Fortran <-> C communicator handle conversion @@ -167,31 +168,29 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, printf("TEST JobID: %s", slurm_jobid_str); } - // PMIx Dynamic Id - const char *pmix_id = getenv(PMIX_DYNAMIC_ID_ENV_VAR); - char pmix_dyn_id[sizeof(uint64_t) + 1] = ""; - char nspace[257] = ""; - if (NULL != pmix_id) { - size_t did = 0; - sscanf(pmix_id, "%zu", &did); - did += vpid; - sprintf(pmix_dyn_id, ",%zu", did); - printf("TEST PMIx Dynamic ID: %s", pmix_dyn_id); + // Dynamic Identifier + const char *pmix_id_offset = getenv(PMIX_DYNAMIC_ID_ENV_VAR); + char pmix_dynamic_id[128] = ""; + char pmix_namespace[257] = ""; // PMIx max namespace len + comma + if (NULL != pmix_id_offset) { + size_t dynamic_id = 0; + sscanf(pmix_id_offset, "%zu", &dynamic_id); + dynamic_id += vpid; + sprintf(pmix_dynamic_id, ",%zu", dynamic_id); // PMIx Namespace - const char *pmix_nspace = getenv("PMIX_NAMESPACE"); - if (NULL != pmix_nspace) { - char comma = ','; - strncat(nspace, &comma, 1); - strcat(nspace, pmix_nspace); - printf("TEST PMIx Namespace: %s", nspace); + const char *pmix_nspace_env = getenv(PMIX_NAMESPACE_ENV_VAR); + if (NULL != pmix_nspace_env) { + strncat(pmix_namespace, ",", 1); + strcat(pmix_namespace, pmix_nspace_env); } } char info_to_send[BUFFLEN]; memset(info_to_send, 0, BUFFLEN); snprintf(info_to_send, BUFFLEN, "{\"msg_type\": 128, \"msg_data\": \"%d,%u%s,%zu%s%s%s\"}", pid, - vpid, slurm_jobid_str, size, vrm_jobid_with_leading_comma, pmix_dyn_id, nspace); + vpid, slurm_jobid_str, size, vrm_jobid_with_leading_comma, pmix_dynamic_id, + pmix_namespace); uint32_t msg_length = strlen(info_to_send) + 1; // Ensure that little endian is used for communinication (by convention with server) @@ -436,9 +435,15 @@ int ompi_comm_finalize(void) /* Shut down MPI_COMM_SELF */ OBJ_DESTRUCT(&ompi_mpi_comm_self); + printf("FIN 7aa\n"); + fflush(stdout); + /* disconnect all dynamic communicators */ ompi_dpm_dyn_finalize(); + printf("FIN 7ab\n"); + fflush(stdout); + /* Free the attributes on comm world. This is not done in the * destructor as we delete attributes in ompi_comm_free (which * is not called for comm world) */ @@ -518,9 +523,15 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT(&ompi_mpi_communicators); OBJ_DESTRUCT(&ompi_comm_f_to_c_table); + printf("FIN 7ac\n"); + fflush(stdout); + /* finalize communicator requests */ ompi_comm_request_fini(); + printf("FIN 7ad\n"); + fflush(stdout); + return OMPI_SUCCESS; } diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index b73e2eaa7d..36cfda4fba 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -31,44 +31,44 @@ #include "ompi_config.h" #include "ompi/constants.h" -#include <string.h> -#include <stdio.h> #include <ctype.h> +#include <stdio.h> +#include <string.h> #include <time.h> #if HAVE_SYS_TIME_H -#include <sys/time.h> +# include <sys/time.h> #endif #include <fcntl.h> +#include "opal/mca/hwloc/base/base.h" +#include "opal/mca/pmix/base/base.h" #include "opal/util/alfg.h" #include "opal/util/argv.h" -#include "opal/util/opal_getcwd.h" #include "opal/util/opal_environ.h" +#include "opal/util/opal_getcwd.h" #include "opal/util/path.h" +#include "opal/util/printf.h" #include "opal/util/proc.h" #include "opal/util/show_help.h" -#include "opal/util/printf.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/pmix/base/base.h" #include "ompi/communicator/communicator.h" #include "ompi/group/group.h" -#include "ompi/proc/proc.h" +#include "ompi/info/info.h" #include "ompi/mca/pml/pml.h" +#include "ompi/proc/proc.h" #include "ompi/runtime/ompi_rte.h" -#include "ompi/info/info.h" #include "ompi/dpm/dpm.h" static opal_rng_buff_t rnd; typedef struct { - ompi_communicator_t *comm; - int size; - struct ompi_request_t **reqs; - int buf; + ompi_communicator_t *comm; + int size; + struct ompi_request_t **reqs; + int buf; } ompi_dpm_disconnect_obj; -static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs); +static int disconnect_waitall(int count, ompi_dpm_disconnect_obj **objs); static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm); static int start_dvm(char **hostfiles, char **dash_host); @@ -76,9 +76,7 @@ typedef struct { opal_list_item_t super; ompi_proc_t *p; } ompi_dpm_proct_caddy_t; -static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t, - opal_list_item_t, - NULL, NULL); +static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t, opal_list_item_t, NULL, NULL); /* * Init the module @@ -95,12 +93,11 @@ int ompi_dpm_init(void) return OMPI_SUCCESS; } -int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm) +int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, const char *port_string, + bool send_first, ompi_communicator_t **newcomm) { - int k, size, rsize, rank, rc, rportlen=0; - char **members = NULL, *nstring, *rport=NULL, *key, *pkey; + int k, size, rsize, rank, rc, rportlen = 0; + char **members = NULL, *nstring, *rport = NULL, *key, *pkey; bool dense, isnew; opal_process_name_t pname; opal_list_t ilist, mlist, rlist; @@ -112,10 +109,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, pmix_status_t pret; opal_proclist_t *plt; - ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_communicator_t *newcomp = MPI_COMM_NULL; ompi_proc_t *proc; - ompi_group_t *group=comm->c_local_group; - ompi_proc_t **proc_list=NULL, **new_proc_list = NULL; + ompi_group_t *group = comm->c_local_group; + ompi_proc_t **proc_list = NULL, **new_proc_list = NULL; int32_t i; ompi_group_t *new_group_pointer; ompi_dpm_proct_caddy_t *cd; @@ -123,8 +120,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, /* set default error return */ *newcomm = MPI_COMM_NULL; - size = ompi_comm_size ( comm ); - rank = ompi_comm_rank ( comm ); + size = ompi_comm_size(comm); + rank = ompi_comm_rank(comm); + + printf("ACC 1\n"); + fflush(stdout); /* the "send_first" end will append ":connect" to the port name and publish * the list of its participating procs on that key. The receiving root proc @@ -146,7 +146,8 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, char *value = strrchr(port_string, '='); assert(NULL != value); rportlen = atoi(++value); - if (rportlen > 0) rportlen *= -1; + if (rportlen > 0) + rportlen *= -1; goto bcast_rportlen; } @@ -159,7 +160,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, opal_argv_append_nosize(&members, nstring); free(nstring); /* add the number of procs in this job */ - (void)opal_asprintf(&nstring, "%d", size); + (void) opal_asprintf(&nstring, "%d", size); opal_argv_append_nosize(&members, nstring); free(nstring); } else { @@ -167,10 +168,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, proc_list = group->grp_proc_pointers; dense = true; } else { - proc_list = (ompi_proc_t**)calloc(group->grp_proc_count, - sizeof(ompi_proc_t *)); - for (i=0 ; i<group->grp_proc_count ; i++) { - if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { + proc_list = (ompi_proc_t **) calloc(group->grp_proc_count, sizeof(ompi_proc_t *)); + for (i = 0; i < group->grp_proc_count; i++) { + if (NULL == (proc_list[i] = ompi_group_peer_lookup(group, i))) { OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); rc = OMPI_ERR_NOT_FOUND; free(proc_list); @@ -179,10 +179,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, } dense = false; } - for (i=0; i < size; i++) { + for (i = 0; i < size; i++) { opal_process_name_t proc_name; - if (ompi_proc_is_sentinel (proc_list[i])) { - proc_name = ompi_proc_sentinel_to_name ((uintptr_t) proc_list[i]); + if (ompi_proc_is_sentinel(proc_list[i])) { + proc_name = ompi_proc_sentinel_to_name((uintptr_t) proc_list[i]); } else { proc_name = proc_list[i]->super.proc_name; } @@ -196,15 +196,19 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, proc_list = NULL; } } + printf("ACC 2\n"); + fflush(stdout); if (rank == root) { + printf("ROOT: %d\n", root); + fflush(stdout); /* the roots for each side exchange their list of participants */ if (send_first) { - (void)opal_asprintf(&key, "%s:connect", port_string); - (void)opal_asprintf(&pkey, "%s:accept", port_string); + (void) opal_asprintf(&key, "%s:connect", port_string); + (void) opal_asprintf(&pkey, "%s:accept", port_string); } else { - (void)opal_asprintf(&key, "%s:accept", port_string); - (void)opal_asprintf(&pkey, "%s:connect", port_string); + (void) opal_asprintf(&key, "%s:accept", port_string); + (void) opal_asprintf(&pkey, "%s:connect", port_string); } nstring = opal_argv_join(members, ':'); PMIX_INFO_LOAD(&info, key, nstring, PMIX_STRING); @@ -213,7 +217,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, free(key); free(pkey); - rc = opal_pmix_base_exchange(&info, &pdat, 600); // give them 10 minutes + rc = opal_pmix_base_exchange(&info, &pdat, 600); // give them 10 minutes PMIX_INFO_DESTRUCT(&info); if (OPAL_SUCCESS != rc) { PMIX_PDATA_DESTRUCT(&pdat); @@ -221,10 +225,12 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, } /* save the result */ - rport = strdup(pdat.value.data.string); // need this later - rportlen = strlen(rport) + 1; // retain the NULL terminator + rport = strdup(pdat.value.data.string); // need this later + rportlen = strlen(rport) + 1; // retain the NULL terminator PMIX_PDATA_DESTRUCT(&pdat); } + printf("ACC 3\n"); + fflush(stdout); bcast_rportlen: /* if we aren't in a comm_spawn, the non-root members won't have @@ -233,12 +239,14 @@ bcast_rportlen: /* bcast the list-length to all processes in the local comm */ rc = comm->c_coll->coll_bcast(&rportlen, 1, MPI_INT, root, comm, - comm->c_coll->coll_bcast_module); + comm->c_coll->coll_bcast_module); if (OMPI_SUCCESS != rc) { free(rport); goto exit; } + printf("ACC 4\n"); + fflush(stdout); /* This is the comm_spawn error case: the root couldn't do the pmix spawn * and is now propagating to the local group that this operation has to * fail. */ @@ -249,7 +257,7 @@ bcast_rportlen: if (rank != root) { /* non root processes need to allocate the buffer manually */ - rport = (char*)malloc(rportlen); + rport = (char *) malloc(rportlen); if (NULL == rport) { rc = OMPI_ERR_OUT_OF_RESOURCE; goto exit; @@ -257,16 +265,18 @@ bcast_rportlen: } /* now share the list of remote participants */ rc = comm->c_coll->coll_bcast(rport, rportlen, MPI_BYTE, root, comm, - comm->c_coll->coll_bcast_module); + comm->c_coll->coll_bcast_module); if (OMPI_SUCCESS != rc) { free(rport); goto exit; } + printf("ACC 5\n"); + fflush(stdout); /* initiate a list of participants for the connect, * starting with our own members */ OBJ_CONSTRUCT(&mlist, opal_list_t); - for (i=0; NULL != members[i]; i++) { + for (i = 0; NULL != members[i]; i++) { OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]); plt = OBJ_NEW(opal_proclist_t); memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t)); @@ -290,7 +300,7 @@ bcast_rportlen: OBJ_CONSTRUCT(&ilist, opal_list_t); OBJ_CONSTRUCT(&rlist, opal_list_t); - for (i=0; NULL != members[i]; i++) { + for (i = 0; NULL != members[i]; i++) { OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]); plt = OBJ_NEW(opal_proclist_t); memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t)); @@ -300,7 +310,7 @@ bcast_rportlen: /* if the rank is wildcard, then we are including all ranks * of that job, and the next entry in members should be the * number of procs in the job */ - if (NULL == members[i+1]) { + if (NULL == members[i + 1]) { /* just protect against the error */ OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); opal_argv_free(members); @@ -310,9 +320,9 @@ bcast_rportlen: rc = OMPI_ERR_BAD_PARAM; goto exit; } - rsize = strtoul(members[i+1], NULL, 10); + rsize = strtoul(members[i + 1], NULL, 10); ++i; - for (k=0; k < rsize; k++) { + for (k = 0; k < rsize; k++) { pxproc.rank = k; OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pxproc); if (OPAL_SUCCESS != rc) { @@ -359,24 +369,32 @@ bcast_rportlen: } } opal_argv_free(members); + printf("ACC 6\n"); + fflush(stdout); /* convert the list of members to a pmix_proc_t array */ nprocs = opal_list_get_size(&mlist); PMIX_PROC_CREATE(procs, nprocs); n = 0; - OPAL_LIST_FOREACH(plt, &mlist, opal_proclist_t) { + OPAL_LIST_FOREACH (plt, &mlist, opal_proclist_t) { memcpy(&procs[n], &plt->procid, sizeof(pmix_proc_t)); ++n; } OPAL_LIST_DESTRUCT(&mlist); + printf("ACC 7\n"); + fflush(stdout); /* tell the host RTE to connect us - this will download * all known data for the nspace's of participating procs * so that add_procs will not result in a slew of lookups */ pret = PMIx_Connect(procs, nprocs, NULL, 0); + printf("ACC 8\n"); + fflush(stdout); PMIX_PROC_FREE(procs, nprocs); rc = opal_pmix_convert_status(pret); if (OPAL_SUCCESS != rc) { + printf("ACC 8 fail\n"); + fflush(stdout); OMPI_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&ilist); OPAL_LIST_DESTRUCT(&rlist); @@ -389,21 +407,21 @@ bcast_rportlen: uint16_t u16; opal_process_name_t wildcard_rank; /* convert the list of new procs to a proc_t array */ - new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist), - sizeof(ompi_proc_t *)); + new_proc_list = (ompi_proc_t **) calloc(opal_list_get_size(&ilist), sizeof(ompi_proc_t *)); /* get the list of local peers for the new procs */ - cd = (ompi_dpm_proct_caddy_t*)opal_list_get_first(&ilist); + cd = (ompi_dpm_proct_caddy_t *) opal_list_get_first(&ilist); proc = cd->p; wildcard_rank.jobid = proc->super.proc_name.jobid; wildcard_rank.vpid = OMPI_NAME_WILDCARD->vpid; /* retrieve the local peers */ - OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS, - &wildcard_rank, &val, PMIX_STRING); + printf("ACC 8 local peers\n"); + fflush(stdout); + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCAL_PEERS, &wildcard_rank, &val, PMIX_STRING); if (OPAL_SUCCESS == rc && NULL != val) { char **peers = opal_argv_split(val, ','); free(val); nprn = opal_argv_count(peers); - peer_ranks = (uint32_t*)calloc(nprn, sizeof(uint32_t)); + peer_ranks = (uint32_t *) calloc(nprn, sizeof(uint32_t)); for (prn = 0; NULL != peers[prn]; prn++) { peer_ranks[prn] = strtoul(peers[prn], NULL, 10); } @@ -411,23 +429,28 @@ bcast_rportlen: } i = 0; - OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { + printf("ACC 8 start foreach\n"); + fflush(stdout); + OPAL_LIST_FOREACH (cd, &ilist, ompi_dpm_proct_caddy_t) { proc = cd->p; - new_proc_list[i] = proc ; + new_proc_list[i] = proc; /* ompi_proc_complete_init_single() initializes and optionally retrieves * OPAL_PMIX_LOCALITY and OPAL_PMIX_HOSTNAME. since we can live without * them, we are just fine */ + printf("ACC 8 wait init single\n"); + fflush(stdout); ompi_proc_complete_init_single(proc); /* if this proc is local, then get its locality */ if (NULL != peer_ranks) { - for (prn=0; prn < nprn; prn++) { + for (prn = 0; prn < nprn; prn++) { if (peer_ranks[prn] == proc->super.proc_name.vpid) { /* get their locality string */ val = NULL; OPAL_MODEX_RECV_VALUE_IMMEDIATE(rc, PMIX_LOCALITY_STRING, - &proc->super.proc_name, &val, PMIX_STRING); + &proc->super.proc_name, &val, PMIX_STRING); if (OPAL_SUCCESS == rc && NULL != ompi_process_info.locality) { - u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, val); + u16 = opal_hwloc_compute_relative_locality(ompi_process_info.locality, + val); free(val); } else { /* all we can say is that it shares our node */ @@ -443,90 +466,113 @@ bcast_rportlen: } } } + + printf("ACC 8 done\n"); + fflush(stdout); ++i; } if (NULL != peer_ranks) { free(peer_ranks); } /* call add_procs on the new ones */ + printf("ACC 8 add procs\n"); + fflush(stdout); rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); + printf("ACC 8 done add procs\n"); + fflush(stdout); free(new_proc_list); new_proc_list = NULL; if (OMPI_SUCCESS != rc) { + + printf("ACC 8 error add procs\n"); + fflush(stdout); OMPI_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&ilist); goto exit; } } OPAL_LIST_DESTRUCT(&ilist); + printf("ACC 9\n"); + fflush(stdout); /* now deal with the remote group */ rsize = opal_list_get_size(&rlist); - new_group_pointer=ompi_group_allocate(rsize); + new_group_pointer = ompi_group_allocate(rsize); if (NULL == new_group_pointer) { rc = OMPI_ERR_OUT_OF_RESOURCE; OPAL_LIST_DESTRUCT(&rlist); goto exit; } /* assign group elements */ - i=0; - OPAL_LIST_FOREACH(cd, &rlist, ompi_dpm_proct_caddy_t) { + i = 0; + OPAL_LIST_FOREACH (cd, &rlist, ompi_dpm_proct_caddy_t) { new_group_pointer->grp_proc_pointers[i++] = cd->p; /* retain the proc */ OBJ_RETAIN(cd->p); } OPAL_LIST_DESTRUCT(&rlist); + printf("ACC 10\n"); + fflush(stdout); /* set up communicator structure */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - group->grp_proc_count, /* local_size */ - NULL, /* local_procs */ - rsize, /* remote_size */ - NULL , /* remote_procs */ - NULL, /* attrs */ - comm->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); + rc = ompi_comm_set(&newcomp, /* new comm */ + comm, /* old comm */ + group->grp_proc_count, /* local_size */ + NULL, /* local_procs */ + rsize, /* remote_size */ + NULL, /* remote_procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* topo component */ + group, /* local group */ + new_group_pointer /* remote group */ + ); if (OMPI_SUCCESS != rc) { goto exit; } + printf("ACC 11\n"); + fflush(stdout); OBJ_RELEASE(new_group_pointer); new_group_pointer = MPI_GROUP_NULL; /* allocate comm_cid */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - (void*)port_string, /* rendezvous point */ - send_first, /* send or recv first */ - OMPI_COMM_CID_INTRA_PMIX); /* mode */ + rc = ompi_comm_nextcid(newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + (void *) port_string, /* rendezvous point */ + send_first, /* send or recv first */ + OMPI_COMM_CID_INTRA_PMIX); /* mode */ if (OMPI_SUCCESS != rc) { goto exit; } + printf("ACC 12\n"); + fflush(stdout); /* activate comm and init coll-component */ - rc = ompi_comm_activate ( &newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - (void*)port_string, /* rendezvous point */ - send_first, /* send or recv first */ - OMPI_COMM_CID_INTRA_PMIX); /* mode */ + rc = ompi_comm_activate(&newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + (void *) port_string, /* rendezvous point */ + send_first, /* send or recv first */ + OMPI_COMM_CID_INTRA_PMIX); /* mode */ if (OMPI_SUCCESS != rc) { goto exit; } + printf("ACC 13\n"); + fflush(stdout); + /* Question: do we have to re-start some low level stuff to enable the usage of fast communication devices between the two worlds ? */ - exit: +exit: + printf("ACC exit\n"); + fflush(stdout); if (OMPI_SUCCESS != rc) { if (MPI_COMM_NULL != newcomp && NULL != newcomp) { OBJ_RELEASE(newcomp); @@ -545,7 +591,7 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers) ompi_proc_t *proct; opal_process_name_t proc_name; - for (i=0; i < group->grp_proc_count; i++) { + for (i = 0; i < group->grp_proc_count; i++) { if (OMPI_GROUP_IS_DENSE(group)) { proct = group->grp_proc_pointers[i]; } else { @@ -555,8 +601,8 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers) OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); return OMPI_ERR_NOT_FOUND; } - if (ompi_proc_is_sentinel (proct)) { - proc_name = ompi_proc_sentinel_to_name ((uintptr_t)proct); + if (ompi_proc_is_sentinel(proct)) { + proc_name = ompi_proc_sentinel_to_name((uintptr_t) proct); } else { proc_name = proct->super.proc_name; } @@ -566,7 +612,7 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers) nm->name = proc_name; /* need to maintain an ordered list to ensure the tracker signatures * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { + OPAL_LIST_FOREACH (n2, peers, opal_namelist_t) { if (opal_compare_proc(nm->name, n2->name) < 0) { opal_list_insert_pos(peers, &n2->super, &nm->super); nm = NULL; @@ -616,7 +662,7 @@ int ompi_dpm_disconnect(ompi_communicator_t *comm) nprocs = opal_list_get_size(&coll); PMIX_PROC_CREATE(procs, nprocs); n = 0; - OPAL_LIST_FOREACH(nm, &coll, opal_namelist_t) { + OPAL_LIST_FOREACH (nm, &coll, opal_namelist_t) { OPAL_PMIX_CONVERT_NAME(&procs[n], &nm->name); ++n; } @@ -640,28 +686,24 @@ typedef struct { char **conflicts; } dpm_conflicts_t; -static dpm_conflicts_t mapby_modifiers[] = { - {.name = "oversubscribe", .conflicts = (char *[]){"nooversubscribe", NULL}}, - {.name = "nooversubscribe", .conflicts = (char *[]){"oversubscribe", NULL}}, - {.name = ""} -}; +static dpm_conflicts_t mapby_modifiers[] = {{.name = "oversubscribe", + .conflicts = (char *[]){"nooversubscribe", NULL}}, + {.name = "nooversubscribe", + .conflicts = (char *[]){"oversubscribe", NULL}}, + {.name = ""}}; -static dpm_conflicts_t rankby_modifiers[] = { - {.name = ""} -}; +static dpm_conflicts_t rankby_modifiers[] = {{.name = ""}}; -static dpm_conflicts_t bindto_modifiers[] = { - {.name = ""} -}; +static dpm_conflicts_t bindto_modifiers[] = {{.name = ""}}; static int check_modifiers(const char *modifier, char **checks, dpm_conflicts_t *conflicts) { int n, m, k; - for (n=0; 0 != strlen(conflicts[n].name); n++) { + for (n = 0; 0 != strlen(conflicts[n].name); n++) { if (0 == strcasecmp(conflicts[n].name, modifier)) { - for (m=0; NULL != checks[m]; m++) { - for (k=0; NULL != conflicts[n].conflicts[k]; k++) { + for (m = 0; NULL != checks[m]; m++) { + for (k = 0; NULL != conflicts[n].conflicts[k]; k++) { if (0 == strcasecmp(checks[m], conflicts[n].conflicts[k])) { return OMPI_ERR_BAD_PARAM; } @@ -673,12 +715,8 @@ static int check_modifiers(const char *modifier, char **checks, dpm_conflicts_t return OMPI_SUCCESS; } -static int dpm_convert(opal_list_t *infos, - const char *infokey, - const char *option, - const char *directive, - const char *modifier, - bool deprecated) +static int dpm_convert(opal_list_t *infos, const char *infokey, const char *option, + const char *directive, const char *modifier, bool deprecated) { opal_info_item_t *iptr; char *ck, *ptr, *help_str = NULL; @@ -695,23 +733,23 @@ static int dpm_convert(opal_list_t *infos, modifiers = rankby_modifiers; } else if (0 == strcmp(option, PMIX_BINDTO)) { modifiers = bindto_modifiers; - } else { + } else { return OMPI_ERR_BAD_PARAM; } } /* does the matching option already exist? */ - OPAL_LIST_FOREACH(iptr, infos, opal_info_item_t) { + OPAL_LIST_FOREACH (iptr, infos, opal_info_item_t) { if (PMIX_CHECK_KEY(&iptr->info, option)) { ck = strdup(iptr->info.value.data.string); if (NULL != (ptr = strchr(ck, ':'))) { *ptr = '\0'; ++ptr; } - /* were we given a directive? */ + /* were we given a directive? */ if (NULL != directive) { /* does it conflict? */ - if (0 != strncasecmp(ck, directive, strlen(directive))) { + if (0 != strncasecmp(ck, directive, strlen(directive))) { opal_asprintf(&help_str, "Conflicting directives \"%s %s\"", ck, directive); #if PMIX_NUMERIC_VERSION >= 0x00040000 /* TODO: remove strdup if PMIx_Get_attribute_string takes const char* */ @@ -721,8 +759,8 @@ static int dpm_convert(opal_list_t *infos, #else attr = option; #endif - opal_show_help("help-dpm.txt", "deprecated-fail", true, - infokey, attr, help_str); + opal_show_help("help-dpm.txt", "deprecated-fail", true, infokey, attr, + help_str); free(help_str); free(ck); return OMPI_ERR_BAD_PARAM; @@ -746,7 +784,8 @@ static int dpm_convert(opal_list_t *infos, opal_argv_free(tmp); if (OMPI_SUCCESS != rc) { /* we have a conflict */ - opal_asprintf(&ptr, " Option %s\n Conflicting modifiers \"%s %s\"", option, infokey, modifier); + opal_asprintf(&ptr, " Option %s\n Conflicting modifiers \"%s %s\"", + option, infokey, modifier); #if PMIX_NUMERIC_VERSION >= 0x00040000 /* TODO: remove strdup if PMIx_Get_attribute_string takes const char* */ char *option_dup = strdup(option); @@ -755,8 +794,7 @@ static int dpm_convert(opal_list_t *infos, #else attr = option; #endif - opal_show_help("help-dpm.txt", "deprecated-fail", true, - infokey, attr, ptr); + opal_show_help("help-dpm.txt", "deprecated-fail", true, infokey, attr, ptr); free(ptr); free(ck); return OMPI_ERR_BAD_PARAM; @@ -766,8 +804,8 @@ static int dpm_convert(opal_list_t *infos, free(iptr->info.value.data.string); iptr->info.value.data.string = ptr; free(ck); - opal_show_help("help-dpm.txt", "deprecated-converted", true, - infokey, iptr->info.value.data.string); + opal_show_help("help-dpm.txt", "deprecated-converted", true, infokey, + iptr->info.value.data.string); return OMPI_SUCCESS; } } @@ -790,10 +828,9 @@ static int dpm_convert(opal_list_t *infos, opal_list_append(infos, &iptr->super); /* alert them */ - if(deprecated) { + if (deprecated) { opal_asprintf(&help_str, "Key: %s Value: %s", option, ptr); - opal_show_help("help-dpm.txt", "deprecated-converted", true, - infokey, help_str); + opal_show_help("help-dpm.txt", "deprecated-converted", true, infokey, help_str); } free(help_str); free(ptr); @@ -801,16 +838,13 @@ static int dpm_convert(opal_list_t *infos, return OMPI_SUCCESS; } - -int ompi_dpm_spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], +int ompi_dpm_spawn(int count, const char *array_of_commands[], char **array_of_argv[], + const int array_of_maxprocs[], const MPI_Info array_of_info[], const char *port_name) { int rc, i, j; - int have_wdir=0; - int flag=0; + int have_wdir = 0; + int flag = 0; opal_cstring_t *info_str; uint32_t ui32; bool personality = false; @@ -885,9 +919,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], app->maxprocs = array_of_maxprocs[i]; /* copy over the argv array */ - if (MPI_ARGVS_NULL != array_of_argv && - MPI_ARGV_NULL != array_of_argv[i]) { - for (j=0; NULL != array_of_argv[i][j]; j++) { + if (MPI_ARGVS_NULL != array_of_argv && MPI_ARGV_NULL != array_of_argv[i]) { + for (j = 0; NULL != array_of_argv[i][j]; j++) { opal_argv_append_nosize(&app->argv, array_of_argv[i][j]); } } @@ -904,21 +937,21 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* Check for well-known info keys */ have_wdir = 0; - if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { + if (array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL) { /* check for personality - this is a job-level key */ - ompi_info_get (array_of_info[i], "personality", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "personality", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_PERSONALITY */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "personality", "PMIX_PERSONALITY"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "personality", + "PMIX_PERSONALITY"); personality = true; info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_PERSONALITY", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_PERSONALITY", &info_str, &flag); + if (flag) { personality = true; info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING); @@ -927,8 +960,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_PERSONALITY"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { personality = true; info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PERSONALITY, info_str->string, PMIX_STRING); @@ -941,16 +974,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], * MPI standard ch. 10.3.4 */ /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "host", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); opal_argv_append_nosize(&dash_host, info_str->string); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_HOST", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_HOST", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -959,8 +992,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_HOST"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -970,8 +1003,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "wdir", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -979,8 +1012,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], have_wdir = 1; } if (!have_wdir) { - ompi_info_get (array_of_info[i], "PMIX_WDIR", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_WDIR", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -991,8 +1024,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 if (!have_wdir) { checkkey = PMIx_Get_attribute_string("PMIX_WDIR"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_WDIR, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1003,8 +1036,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for 'mpi_initial_errhandler' */ - ompi_info_get (array_of_info[i], "mpi_initial_errhandler", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "mpi_initial_errhandler", &info_str, &flag); + if (flag) { /* this is set as an environment because it must be available * before pmix_init */ opal_setenv("OMPI_MCA_mpi_initial_errhandler", info_str->string, true, &app->env); @@ -1021,16 +1054,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], * deprecated in the non-prefixed form */ /* check for 'hostfile' */ - ompi_info_get (array_of_info[i], "hostfile", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "hostfile", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); opal_argv_append_nosize(&hostfiles, info_str->string); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_HOSTFILE", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_HOSTFILE", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1039,8 +1072,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_HOSTFILE"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1050,18 +1083,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for 'add-hostfile' */ - ompi_info_get (array_of_info[i], "add-hostfile", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "add-hostfile", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_ADD_HOSTFILE */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "add-hostfile", "PMIX_ADD_HOSTFILE"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "add-hostfile", + "PMIX_ADD_HOSTFILE"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_ADD_HOSTFILE", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_ADD_HOSTFILE", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1069,8 +1102,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_ADD_HOSTFILE"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOSTFILE, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1079,18 +1112,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for 'add-host' */ - ompi_info_get (array_of_info[i], "add-host", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "add-host", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_ADD_HOST */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "add-host", "PMIX_ADD_HOST"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "add-host", + "PMIX_ADD_HOST"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_ADD_HOST", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_ADD_HOST", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1098,8 +1131,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_ADD_HOST"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_ADD_HOST, info_str->string, PMIX_STRING); opal_list_append(&app_info, &info->super); @@ -1108,34 +1141,33 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for env */ - ompi_info_get (array_of_info[i], "env", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "env", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_ENVAR */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "env", "PMIX_ENVAR"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "env", "PMIX_ENVAR"); envars = opal_argv_split(info_str->string, '\n'); OBJ_RELEASE(info_str); - for (j=0; NULL != envars[j]; j++) { + for (j = 0; NULL != envars[j]; j++) { opal_argv_append_nosize(&app->env, envars[j]); } opal_argv_free(envars); } - ompi_info_get (array_of_info[i], "PMIX_ENVAR", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_ENVAR", &info_str, &flag); + if (flag) { envars = opal_argv_split(info_str->string, '\n'); OBJ_RELEASE(info_str); - for (j=0; NULL != envars[j]; j++) { + for (j = 0; NULL != envars[j]; j++) { opal_argv_append_nosize(&app->env, envars[j]); } opal_argv_free(envars); } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_ENVAR"); - ompi_info_get (array_of_info[i], "PMIX_ENVAR", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_ENVAR", &info_str, &flag); + if (flag) { envars = opal_argv_split(info_str->string, '\n'); OBJ_RELEASE(info_str); - for (j=0; NULL != envars[j]; j++) { + for (j = 0; NULL != envars[j]; j++) { opal_argv_append_nosize(&app->env, envars[j]); } opal_argv_free(envars); @@ -1147,18 +1179,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], * * This is a job-level key */ - ompi_info_get (array_of_info[i], "ompi_prefix", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "ompi_prefix", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_PREFIX */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "ompi_prefix", "PMIX_PREFIX"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_prefix", + "PMIX_PREFIX"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_PREFIX", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_PREFIX", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1166,8 +1198,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_PREFIX"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PREFIX, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1177,17 +1209,17 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'mapper' - a job-level key */ ompi_info_get(array_of_info[i], "mapper", &info_str, &flag); - if ( flag ) { + if (flag) { /* deprecate --> PMIX_MAPPER */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "mapper", "PMIX_MAPPER"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "mapper", + "PMIX_MAPPER"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); OBJ_RELEASE(info_str); } ompi_info_get(array_of_info[i], "PMIX_MAPPER", &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1196,7 +1228,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_MAPPER"); ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_MAPPER, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1206,7 +1238,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'display_map' - a job-level key */ ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); - if ( flag ) { + if (flag) { rc = dpm_convert(&job_info, "display_map", PMIX_MAPBY, NULL, "DISPLAYMAP", true); if (OMPI_SUCCESS != rc) { OPAL_LIST_DESTRUCT(&job_info); @@ -1224,8 +1256,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } /* check for 'npernode' and 'ppr' - job-level key */ - ompi_info_get (array_of_info[i], "npernode", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "npernode", &info_str, &flag); + if (flag) { opal_asprintf(&tmp, "PPR:%s:NODE", info_str->string); rc = dpm_convert(&job_info, "npernode", PMIX_MAPBY, tmp, NULL, true); free(tmp); @@ -1244,8 +1276,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], return MPI_ERR_SPAWN; } } - ompi_info_get (array_of_info[i], "pernode", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "pernode", &info_str, &flag); + if (flag) { rc = dpm_convert(&job_info, "pernode", PMIX_MAPBY, "PPR:1:NODE", NULL, true); OBJ_RELEASE(info_str); if (OMPI_SUCCESS != rc) { @@ -1262,8 +1294,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], return MPI_ERR_SPAWN; } } - ompi_info_get (array_of_info[i], "ppr", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "ppr", &info_str, &flag); + if (flag) { /* must have correct syntax with two colons */ if (NULL == (tmp = strchr(info_str->string, ':'))) { opal_show_help("help-dpm.txt", "bad-ppr", true, info_str->string); @@ -1315,7 +1347,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'map_by' - job-level key */ ompi_info_get(array_of_info[i], "map_by", &info_str, &flag); - if ( flag ) { + if (flag) { rc = dpm_convert(&job_info, "map_by", PMIX_MAPBY, info_str->string, NULL, false); OBJ_RELEASE(info_str); if (OMPI_SUCCESS != rc) { @@ -1333,7 +1365,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } } ompi_info_get(array_of_info[i], "PMIX_MAPBY", &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_MAPBY, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1342,7 +1374,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_MAPBY"); ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_MAPBY, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1352,7 +1384,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'rank_by' - job-level key */ ompi_info_get(array_of_info[i], "rank_by", &info_str, &flag); - if ( flag ) { + if (flag) { rc = dpm_convert(&job_info, "rank_by", PMIX_RANKBY, info_str->string, NULL, false); OBJ_RELEASE(info_str); if (OMPI_SUCCESS != rc) { @@ -1364,7 +1396,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } } ompi_info_get(array_of_info[i], "PMIX_RANKBY", &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_RANKBY, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1373,7 +1405,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_RANKBY"); ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_RANKBY, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1383,7 +1415,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'bind_to' - job-level key */ ompi_info_get(array_of_info[i], "bind_to", &info_str, &flag); - if ( flag ) { + if (flag) { rc = dpm_convert(&job_info, "bind_to", PMIX_BINDTO, info_str->string, NULL, false); OBJ_RELEASE(info_str); if (OMPI_SUCCESS != rc) { @@ -1395,7 +1427,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } } ompi_info_get(array_of_info[i], "PMIX_BINDTO", &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_BINDTO, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1404,7 +1436,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_BINDTO"); ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_BINDTO, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1414,16 +1446,16 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* check for 'preload_binary' - job-level key */ ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); - if ( flag ) { + if (flag) { /* deprecate --> PMIX_PRELOAD_BIN */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "ompi_preload_binary", "PMIX_PRELOAD_BIN"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_preload_binary", + "PMIX_PRELOAD_BIN"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL); opal_list_append(&job_info, &info->super); } ompi_info_get_bool(array_of_info[i], "PMIX_PRELOAD_BIN", &local_spawn, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL); opal_list_append(&job_info, &info->super); @@ -1431,7 +1463,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_PRELOAD_BIN"); ompi_info_get_bool(array_of_info[i], checkkey, &local_spawn, &flag); - if ( flag ) { + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_BIN, &local_spawn, PMIX_BOOL); opal_list_append(&job_info, &info->super); @@ -1439,18 +1471,18 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], #endif /* check for 'preload_files' - job-level key */ - ompi_info_get (array_of_info[i], "ompi_preload_files", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "ompi_preload_files", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_PRELOAD_FILES */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "ompi_preload_files", "PMIX_PRELOAD_FILES"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_preload_files", + "PMIX_PRELOAD_FILES"); info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_PRELOAD_FILES", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_PRELOAD_FILES", &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1458,8 +1490,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_PRELOAD_FILES"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { info = OBJ_NEW(opal_info_item_t); PMIX_INFO_LOAD(&info->info, PMIX_PRELOAD_FILES, info_str->string, PMIX_STRING); opal_list_append(&job_info, &info->super); @@ -1472,15 +1504,15 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], */ ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && non_mpi) { - opal_show_help("help-dpm.txt", "deprecated-inform", true, - "ompi_non_mpi", "No longer relevant as RTE automatically detects this scenario"); + opal_show_help("help-dpm.txt", "deprecated-inform", true, "ompi_non_mpi", + "No longer relevant as RTE automatically detects this scenario"); } /* see if this is an MCA param that the user wants applied to the child job */ - ompi_info_get (array_of_info[i], "ompi_param", &info_str, &flag); - if ( flag ) { - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "ompi_param", "PMIX_ENVAR"); + ompi_info_get(array_of_info[i], "ompi_param", &info_str, &flag); + if (flag) { + opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_param", + "PMIX_ENVAR"); opal_argv_append_unique_nosize(&app->env, info_str->string, true); OBJ_RELEASE(info_str); } @@ -1488,11 +1520,11 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* see if user specified what to do with stdin - defaults to * not forwarding stdin to child processes - job-level key */ - ompi_info_get (array_of_info[i], "ompi_stdin_target", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "ompi_stdin_target", &info_str, &flag); + if (flag) { /* deprecate --> PMIX_STDIN_TGT */ - opal_show_help("help-dpm.txt", "deprecated-converted", true, - "ompi_stdin_target", "PMIX_STDIN_TGT"); + opal_show_help("help-dpm.txt", "deprecated-converted", true, "ompi_stdin_target", + "PMIX_STDIN_TGT"); if (0 == strcmp(info_str->string, "all")) { ui32 = OPAL_VPID_WILDCARD; } else if (0 == strcmp(info_str->string, "none")) { @@ -1505,8 +1537,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], opal_list_append(&job_info, &info->super); OBJ_RELEASE(info_str); } - ompi_info_get (array_of_info[i], "PMIX_STDIN_TGT", &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], "PMIX_STDIN_TGT", &info_str, &flag); + if (flag) { if (0 == strcmp(info_str->string, "all")) { ui32 = OPAL_VPID_WILDCARD; } else if (0 == strcmp(info_str->string, "none")) { @@ -1521,8 +1553,8 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], } #if PMIX_NUMERIC_VERSION >= 0x00040000 checkkey = PMIx_Get_attribute_string("PMIX_STDIN_TGT"); - ompi_info_get (array_of_info[i], checkkey, &info_str, &flag); - if ( flag ) { + ompi_info_get(array_of_info[i], checkkey, &info_str, &flag); + if (flag) { if (0 == strcmp(info_str->string, "all")) { ui32 = OPAL_VPID_WILDCARD; } else if (0 == strcmp(info_str->string, "none")) { @@ -1541,11 +1573,11 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], /* default value: If the user did not tell us where to look for the * executable, we assume the current working directory */ - if ( !have_wdir ) { + if (!have_wdir) { char cwd[OPAL_PATH_MAX]; if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { OMPI_ERROR_LOG(rc); - PMIX_APP_FREE(apps, (size_t)count); + PMIX_APP_FREE(apps, (size_t) count); opal_progress_event_users_decrement(); if (NULL != hostfiles) { opal_argv_free(hostfiles); @@ -1569,7 +1601,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], PMIX_INFO_CREATE(app->info, ninfo); app->ninfo = ninfo; n = 0; - OPAL_LIST_FOREACH(info, &app_info, opal_info_item_t) { + OPAL_LIST_FOREACH (info, &app_info, opal_info_item_t) { PMIX_INFO_XFER(&app->info[n], &info->info); ++n; } @@ -1589,7 +1621,7 @@ int ompi_dpm_spawn(int count, const char *array_of_commands[], if (0 < ninfo) { PMIX_INFO_CREATE(pinfo, ninfo); n = 0; - OPAL_LIST_FOREACH(info, &job_info, opal_info_item_t) { + OPAL_LIST_FOREACH (info, &job_info, opal_info_item_t) { PMIX_INFO_XFER(&pinfo[n], &info->info); ++n; } @@ -1643,7 +1675,7 @@ int ompi_dpm_open_port(char *port_name) r = opal_rand(&rnd); opal_convert_process_name_to_string(&tmp, OMPI_PROC_MY_NAME); - snprintf(port_name, MPI_MAX_PORT_NAME-1, "%s:%u", tmp, r); + snprintf(port_name, MPI_MAX_PORT_NAME - 1, "%s:%u", tmp, r); port_name[MPI_MAX_PORT_NAME - 1] = '\0'; free(tmp); return OMPI_SUCCESS; @@ -1657,10 +1689,10 @@ int ompi_dpm_close_port(const char *port_name) int ompi_dpm_dyn_init(void) { - int root=0, rc; + int root = 0, rc; bool send_first = true; - ompi_communicator_t *newcomm=NULL; - char *port_name=NULL, *tmp, *ptr; + ompi_communicator_t *newcomm = NULL; + char *port_name = NULL, *tmp, *ptr; /* check for appropriate env variable */ tmp = getenv("OMPI_PARENT_PORT"); @@ -1671,11 +1703,11 @@ int ompi_dpm_dyn_init(void) /* the value passed to us may have quote marks around it to protect * the value if passed on the command line. We must remove those - * to have a correct string + * to have a correct string */ - if ('"' == tmp[0]) { + if ('"' == tmp[0]) { /* if the first char is a quote, then so will the last one be */ - tmp[strlen(tmp)-1] = '\0'; + tmp[strlen(tmp) - 1] = '\0'; ptr = &tmp[1]; } else { ptr = &tmp[0]; @@ -1703,10 +1735,13 @@ int ompi_dpm_dyn_init(void) snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); newcomm->c_flags |= OMPI_COMM_NAMEISSET; + FILE *dbg_out = fopen("/home/ompi_debug", "a"); + fprintf(dbg_out, "OMPI_DPM INIT COMPLETE\n"); + fclose(dbg_out); + return OMPI_SUCCESS; } - /* * finalize the module */ @@ -1717,7 +1752,7 @@ int ompi_dpm_finalize(void) static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int count) { - for(int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) { if (NULL != objs[i]->reqs) { free(objs[i]->reqs); } @@ -1733,21 +1768,24 @@ static void cleanup_dpm_disconnect_objs(ompi_dpm_disconnect_obj **objs, int coun and does the disconnect for all dynamic communicators */ int ompi_dpm_dyn_finalize(void) { - int i,j=0, max=0; - ompi_dpm_disconnect_obj **objs=NULL; - ompi_communicator_t *comm=NULL; + int i, j = 0, max = 0; + ompi_dpm_disconnect_obj **objs = NULL; + ompi_communicator_t *comm = NULL; + + if (1 < ompi_comm_num_dyncomm) { + objs = (ompi_dpm_disconnect_obj **) malloc(ompi_comm_num_dyncomm + * sizeof(ompi_dpm_disconnect_obj *)); - if (1 <ompi_comm_num_dyncomm) { - objs = (ompi_dpm_disconnect_obj**)malloc(ompi_comm_num_dyncomm * - sizeof(ompi_dpm_disconnect_obj*)); + printf("dc start\n"); + fflush(stdout); if (NULL == objs) { return OMPI_ERR_OUT_OF_RESOURCE; } max = opal_pointer_array_get_size(&ompi_mpi_communicators); - for (i=3; i<max; i++) { - comm = (ompi_communicator_t*)opal_pointer_array_get_item(&ompi_mpi_communicators,i); - if (NULL != comm && OMPI_COMM_IS_DYNAMIC(comm)) { + for (i = 3; i < max; i++) { + comm = (ompi_communicator_t *) opal_pointer_array_get_item(&ompi_mpi_communicators, i); + if (NULL != comm && OMPI_COMM_IS_DYNAMIC(comm)) { objs[j++] = disconnect_init(comm); } } @@ -1757,8 +1795,14 @@ int ompi_dpm_dyn_finalize(void) return OMPI_ERROR; } + printf("waitall\n"); + fflush(stdout); disconnect_waitall(ompi_comm_num_dyncomm, objs); + printf("dc done 1\n"); + fflush(stdout); } + printf("dc done all\n"); + fflush(stdout); return OMPI_SUCCESS; } @@ -1779,11 +1823,11 @@ The communicators can than be released. static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm) { - ompi_dpm_disconnect_obj *obj=NULL; + ompi_dpm_disconnect_obj *obj = NULL; int ret; int i; - obj = (ompi_dpm_disconnect_obj*)calloc(1,sizeof(ompi_dpm_disconnect_obj)); + obj = (ompi_dpm_disconnect_obj *) calloc(1, sizeof(ompi_dpm_disconnect_obj)); if (NULL == obj) { opal_output(0, "Could not allocate disconnect object"); return NULL; @@ -1796,7 +1840,7 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm) } obj->comm = comm; - obj->reqs = (ompi_request_t**)malloc(2*obj->size*sizeof(ompi_request_t *)); + obj->reqs = (ompi_request_t **) malloc(2 * obj->size * sizeof(ompi_request_t *)); if (NULL == obj->reqs) { opal_output(0, "Could not allocate request array for disconnect object"); free(obj); @@ -1805,10 +1849,9 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm) /* initiate all isend_irecvs. We use a dummy buffer stored on the object, since we are sending zero size messages anyway. */ - for (i=0; i < obj->size; i++) { - ret = MCA_PML_CALL(irecv(&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, comm, - &(obj->reqs[2*i]))); + for (i = 0; i < obj->size; i++) { + ret = MCA_PML_CALL( + irecv(&(obj->buf), 0, MPI_INT, i, OMPI_COMM_BARRIER_TAG, comm, &(obj->reqs[2 * i]))); if (OMPI_SUCCESS != ret) { opal_output(0, "dpm_disconnect_init: error %d in irecv to process %d", ret, i); @@ -1816,10 +1859,8 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm) free(obj); return NULL; } - ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, - MCA_PML_BASE_SEND_SYNCHRONOUS, - comm, &(obj->reqs[2*i+1]))); + ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i, OMPI_COMM_BARRIER_TAG, + MCA_PML_BASE_SEND_SYNCHRONOUS, comm, &(obj->reqs[2 * i + 1]))); if (OMPI_SUCCESS != ret) { opal_output(0, "dpm_disconnect_init: error %d in isend to process %d", ret, i); @@ -1841,16 +1882,19 @@ static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm) * - call waitall on the overall request array * - free the objects */ -static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs) +static int disconnect_waitall(int count, ompi_dpm_disconnect_obj **objs) { - ompi_request_t **reqs=NULL; - char *treq=NULL; + printf("RUN waitall\n"); + fflush(stdout); + + ompi_request_t **reqs = NULL; + char *treq = NULL; int totalcount = 0; int i; int ret; - for (i=0; i<count; i++) { + for (i = 0; i < count; i++) { if (NULL == objs[i]) { opal_output(0, "Error in comm_disconnect_waitall"); return OMPI_ERROR; @@ -1859,21 +1903,28 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs) totalcount += objs[i]->size; } - reqs = (ompi_request_t**)malloc(2*totalcount*sizeof(ompi_request_t *)); + reqs = (ompi_request_t **) malloc(2 * totalcount * sizeof(ompi_request_t *)); if (NULL == reqs) { opal_output(0, "ompi_comm_disconnect_waitall: error allocating memory"); return OMPI_ERROR; } /* generate a single, large array of pending requests */ - treq = (char *)reqs; - for (i=0; i<count; i++) { - memcpy(treq, objs[i]->reqs, 2*objs[i]->size * sizeof(ompi_request_t *)); - treq += 2*objs[i]->size * sizeof(ompi_request_t *); + treq = (char *) reqs; + for (i = 0; i < count; i++) { + memcpy(treq, objs[i]->reqs, 2 * objs[i]->size * sizeof(ompi_request_t *)); + treq += 2 * objs[i]->size * sizeof(ompi_request_t *); } /* force all non-blocking all-to-alls to finish */ - ret = ompi_request_wait_all(2*totalcount, reqs, MPI_STATUSES_IGNORE); + + printf("waitall request\n"); + fflush(stdout); + for (i = 0; i < 2 * totalcount; i++) { + printf("Request %d\n", (int) reqs[i]->req_type); + fflush(stdout); + } + ret = ompi_request_wait_all(2 * totalcount, reqs, MPI_STATUSES_IGNORE); /* Finally, free everything */ cleanup_dpm_disconnect_objs(objs, count); @@ -1885,12 +1936,12 @@ static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs) /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ -static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid) +static bool ompi_dpm_group_is_dyn(ompi_group_t *group, ompi_jobid_t thisjobid) { - int size = group ? ompi_group_size (group) : 0; + int size = group ? ompi_group_size(group) : 0; - for (int i = 0 ; i < size ; ++i) { - opal_process_name_t name = ompi_group_get_proc_name (group, i); + for (int i = 0; i < size; ++i) { + opal_process_name_t name = ompi_group_get_proc_name(group, i); if (thisjobid != ((ompi_process_name_t *) &name)->jobid) { /* at least one is different */ @@ -1916,17 +1967,17 @@ void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm) return; } - thisjobid = ompi_group_get_proc_name (comm->c_local_group, 0).jobid; + thisjobid = ompi_group_get_proc_name(comm->c_local_group, 0).jobid; /* loop over all processes in local group and check for * a different jobid */ - found = ompi_dpm_group_is_dyn (comm->c_local_group, thisjobid); + found = ompi_dpm_group_is_dyn(comm->c_local_group, thisjobid); if (!found) { /* if inter-comm, loop over all processes in remote_group * and see if any are different from thisjobid */ - found = ompi_dpm_group_is_dyn (comm->c_remote_group, thisjobid); + found = ompi_dpm_group_is_dyn(comm->c_remote_group, thisjobid); } /* if a different jobid was found, set the disconnect flag*/ @@ -1938,7 +1989,7 @@ void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm) #if OMPI_HAVE_PRRTE -#define DVM_URI_MSG_LGTH 256 +# define DVM_URI_MSG_LGTH 256 static void set_handler_default(int sig) { @@ -1948,7 +1999,7 @@ static void set_handler_default(int sig) act.sa_flags = 0; sigemptyset(&act.sa_mask); - sigaction(sig, &act, (struct sigaction *)0); + sigaction(sig, &act, (struct sigaction *) 0); } static int start_dvm(char **hostfiles, char **dash_host) @@ -1996,7 +2047,7 @@ static int start_dvm(char **hostfiles, char **dash_host) return OMPI_ERROR; } - /* we need to start the PRRTE DVM first so we can + /* we need to start the PRRTE DVM first so we can * spawn processes - see if they gave us any hostfile * or dash-host options we should pass along */ opal_argv_append_nosize(&args, "prte"); @@ -2072,25 +2123,23 @@ static int start_dvm(char **hostfiles, char **dash_host) execv(cmd, args); /* if I get here, the execv failed! */ - opal_show_help("help-ess-base.txt", "ess-base:execv-error", - true, cmd, strerror(errno)); + opal_show_help("help-ess-base.txt", "ess-base:execv-error", true, cmd, strerror(errno)); exit(1); - } free(cmd); /* I am the parent - wait to hear something back and * report results */ - close(p[1]); /* parent closes the write - prte will write its contact info to it*/ - close(death_pipe[0]); /* parent closes the death_pipe's read */ + close(p[1]); /* parent closes the write - prte will write its contact info to it*/ + close(death_pipe[0]); /* parent closes the death_pipe's read */ opal_argv_free(args); /* setup the buffer to read the DVM's uri */ buffer_length = DVM_URI_MSG_LGTH; - chunk = DVM_URI_MSG_LGTH-1; + chunk = DVM_URI_MSG_LGTH - 1; num_chars_read = 0; - uri = (char*)malloc(buffer_length); + uri = (char *) malloc(buffer_length); memset(uri, 0, buffer_length); while (0 != (rc = read(p[0], &uri[num_chars_read], chunk))) { @@ -2105,7 +2154,7 @@ static int start_dvm(char **hostfiles, char **dash_host) chunk -= rc; if (0 == chunk) { chunk = DVM_URI_MSG_LGTH; - uri = realloc((void*)uri, buffer_length+chunk); + uri = realloc((void *) uri, buffer_length + chunk); memset(&uri[buffer_length], 0, chunk); buffer_length += chunk; } diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 777f7dd211..96f52e2c8b 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -27,86 +27,84 @@ #include "ompi_config.h" #include <stdio.h> -#include "opal/util/show_help.h" #include "opal/util/printf.h" +#include "opal/util/show_help.h" -#include "ompi/info/info.h" -#include "ompi/mpi/c/bindings.h" -#include "ompi/runtime/params.h" -#include "ompi/runtime/mpiruntime.h" #include "ompi/communicator/communicator.h" -#include "ompi/errhandler/errhandler.h" #include "ompi/dpm/dpm.h" +#include "ompi/errhandler/errhandler.h" +#include "ompi/info/info.h" #include "ompi/memchecker.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/params.h" #if OMPI_BUILD_MPI_PROFILING -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPI_Comm_spawn = PMPI_Comm_spawn -#endif -#define MPI_Comm_spawn PMPI_Comm_spawn +# if OPAL_HAVE_WEAK_SYMBOLS +# pragma weak MPI_Comm_spawn = PMPI_Comm_spawn +# endif +# define MPI_Comm_spawn PMPI_Comm_spawn #endif static const char FUNC_NAME[] = "MPI_Comm_spawn"; - -int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, - int root, MPI_Comm comm, MPI_Comm *intercomm, - int array_of_errcodes[]) +int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, int root, + MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]) { - int rank, rc=OMPI_SUCCESS, i, flag; + printf("MPI A\n"); + fflush(stdout); + int rank, rc = OMPI_SUCCESS, i, flag; bool send_first = false; /* we wait to be contacted */ - ompi_communicator_t *newcomp=MPI_COMM_NULL; - char port_name[MPI_MAX_PORT_NAME]; char *port_string = NULL; + ompi_communicator_t *newcomp = MPI_COMM_NULL; + char port_name[MPI_MAX_PORT_NAME]; + char *port_string = NULL; bool non_mpi = false; - MEMCHECKER( - memchecker_comm(comm); - ); + MEMCHECKER(memchecker_comm(comm);); + printf("MPI B\n"); + fflush(stdout); - if ( MPI_PARAM_CHECK ) { + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if ( ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, - FUNC_NAME); + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME); } - if ( OMPI_COMM_IS_INTER(comm)) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, - FUNC_NAME); + if (OMPI_COMM_IS_INTER(comm)) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - if ( (0 > root) || (ompi_comm_size(comm) <= root) ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, - FUNC_NAME); + if ((0 > root) || (ompi_comm_size(comm) <= root)) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( NULL == intercomm ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, - FUNC_NAME); + if (NULL == intercomm) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } #if OPAL_ENABLE_FT_MPI - if( OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc)) ) { + if (OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc))) { return OMPI_ERRHANDLER_INVOKE(comm, rc, FUNC_NAME); } #endif + printf("MPI C\n"); + fflush(stdout); - rank = ompi_comm_rank ( comm ); - if ( MPI_PARAM_CHECK ) { - if ( rank == root ) { - if ( NULL == command ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, - FUNC_NAME); + rank = ompi_comm_rank(comm); + if (MPI_PARAM_CHECK) { + if (rank == root) { + if (NULL == command) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( 0 > maxprocs ) { - return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, - FUNC_NAME); + if (0 > maxprocs) { + return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } if (NULL == info || ompi_info_is_freed(info)) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, - FUNC_NAME); + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME); } } } + printf("MPI D\n"); + fflush(stdout); if (!ompi_mpi_dynamics_is_enabled(FUNC_NAME)) { return OMPI_ERRHANDLER_INVOKE(comm, OMPI_ERR_NOT_SUPPORTED, FUNC_NAME); @@ -114,21 +112,21 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf /* initialize the port name to avoid problems */ memset(port_name, 0, MPI_MAX_PORT_NAME); - printf("MPI E\n"); fflush(stdout); - + printf("MPI E\n"); + fflush(stdout); /* See if the info key "ompi_non_mpi" was set to true */ if (rank == root) { ompi_info_get_bool(info, "ompi_non_mpi", &non_mpi, &flag); } - if ( rank == root ) { + if (rank == root) { if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ printf("NON_MPI\n"); fflush(stdout); - if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -140,15 +138,16 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf } printf("SPAWN\n"); fflush(stdout); - if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs, - &info, port_name))) { + if (OMPI_SUCCESS + != (rc = ompi_dpm_spawn(1, &command, &argv, &maxprocs, &info, port_name))) { goto error; } - } printf("MPI F\n"); + } + printf("MPI F\n"); fflush(stdout); - -error: printf("MPI G\n"); +error: + printf("MPI G\n"); fflush(stdout); if (OMPI_SUCCESS != rc) { @@ -159,32 +158,34 @@ error: printf("MPI G\n"); * non-root ranks do not deadlock. * Add the error code to the port string for connect_accept * to propagate the error code. */ - (void)opal_asprintf(&port_string, "%s:error=%d", port_name, rc); - } - else { + (void) opal_asprintf(&port_string, "%s:error=%d", port_name, rc); + } else { port_string = port_name; } + printf("MPI H\n"); + fflush(stdout); + if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm_connect_accept (comm, root, port_string, send_first, &newcomp); + rc = ompi_dpm_connect_accept(comm, root, port_string, send_first, &newcomp); } if (OPAL_ERR_NOT_SUPPORTED == rc) { printf("NOT SUPPORTED\n"); - opal_show_help("help-mpi-api.txt", - "MPI function not supported", - true, - FUNC_NAME, + opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME, "Underlying runtime environment does not support spawn functionality"); fflush(stdout); } - if(port_string != port_name) { + if (port_string != port_name) { free(port_string); } + printf("MPI I\n"); + fflush(stdout); + /* close the port */ if (rank == root && !non_mpi) { ompi_dpm_close_port(port_name); @@ -192,14 +193,14 @@ error: printf("MPI G\n"); /* set error codes */ if (MPI_ERRCODES_IGNORE != array_of_errcodes) { - for ( i=0; i < maxprocs; i++ ) { - array_of_errcodes[i]=rc; + for (i = 0; i < maxprocs; i++) { + array_of_errcodes[i] = rc; } } - printf("HANDLE ERROR %d", rc); - fflush(stdout); + printf("MPI J\n", rc); + fflush(stdout); *intercomm = newcomp; - OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index 58b36c855c..0ac117e91b 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -27,88 +27,86 @@ #include "ompi_config.h" #include <stdio.h> -#include "opal/util/show_help.h" #include "opal/util/printf.h" +#include "opal/util/show_help.h" -#include "ompi/mpi/c/bindings.h" -#include "ompi/runtime/params.h" -#include "ompi/runtime/mpiruntime.h" #include "ompi/communicator/communicator.h" +#include "ompi/dpm/dpm.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" +#include "ompi/mpi/c/bindings.h" +#include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/params.h" #if OMPI_BUILD_MPI_PROFILING -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPI_Comm_spawn_multiple = PMPI_Comm_spawn_multiple -#endif -#define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple +# if OPAL_HAVE_WEAK_SYMBOLS +# pragma weak MPI_Comm_spawn_multiple = PMPI_Comm_spawn_multiple +# endif +# define MPI_Comm_spawn_multiple PMPI_Comm_spawn_multiple #endif static const char FUNC_NAME[] = "MPI_Comm_spawn_multiple"; - int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], - const int array_of_maxprocs[], const MPI_Info array_of_info[], - int root, MPI_Comm comm, MPI_Comm *intercomm, - int array_of_errcodes[]) + const int array_of_maxprocs[], const MPI_Info array_of_info[], int root, + MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]) { - int i=0, rc=0, rank=0, size=0, flag; - ompi_communicator_t *newcomp=MPI_COMM_NULL; - bool send_first=false; /* they are contacting us first */ - char port_name[MPI_MAX_PORT_NAME]; char *port_string = NULL; + printf("MPI A\n"); + fflush(stdout); + int i = 0, rc = 0, rank = 0, size = 0, flag; + ompi_communicator_t *newcomp = MPI_COMM_NULL; + bool send_first = false; /* they are contacting us first */ + char port_name[MPI_MAX_PORT_NAME]; + char *port_string = NULL; bool non_mpi = false, cumulative = false; - MEMCHECKER( - memchecker_comm(comm); - ); + MEMCHECKER(memchecker_comm(comm);); - if ( MPI_PARAM_CHECK ) { + if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); - if ( ompi_comm_invalid (comm)) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, - FUNC_NAME); + if (ompi_comm_invalid(comm)) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME); } - if ( OMPI_COMM_IS_INTER(comm)) { + if (OMPI_COMM_IS_INTER(comm)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME); } - if ( (0 > root) || (ompi_comm_size(comm) <= root) ) { + if ((0 > root) || (ompi_comm_size(comm) <= root)) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( NULL == intercomm ) { + if (NULL == intercomm) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } - rank = ompi_comm_rank ( comm ); - if ( MPI_PARAM_CHECK ) { - if ( rank == root ) { - if ( 0 > count ) { + printf("MPI B\n"); + fflush(stdout); + + rank = ompi_comm_rank(comm); + if (MPI_PARAM_CHECK) { + if (rank == root) { + if (0 > count) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( NULL == array_of_commands ) { + if (NULL == array_of_commands) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( NULL == array_of_maxprocs ) { + if (NULL == array_of_maxprocs) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( NULL == array_of_info ) { + if (NULL == array_of_info) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_INFO, FUNC_NAME); } for (i = 0; i < count; ++i) { - if (NULL == array_of_info[i] || - ompi_info_is_freed(array_of_info[i])) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, - FUNC_NAME); + if (NULL == array_of_info[i] || ompi_info_is_freed(array_of_info[i])) { + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME); } /* If ompi_non_mpi is set to true on any info, it must be set to true on all of them. Note that not setting ompi_non_mpi is the same as setting it to false. */ - ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, - &flag); + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); if (flag && 0 == i) { /* If this is the first info, save its ompi_non_mpi value */ @@ -119,32 +117,35 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o /* If this info's effective value doesn't agree with the rest of them, error */ if (cumulative != non_mpi) { - return OMPI_ERRHANDLER_NOHANDLE_INVOKE( - MPI_ERR_INFO, - FUNC_NAME); + return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_INFO, FUNC_NAME); } } - for ( i=0; i<count; i++ ) { - if ( NULL == array_of_commands[i] ) { + for (i = 0; i < count; i++) { + if (NULL == array_of_commands[i]) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } - if ( 0 > array_of_maxprocs[i] ) { + if (0 > array_of_maxprocs[i]) { return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_ARG, FUNC_NAME); } } } } + printf("MPI C\n"); + fflush(stdout); + if (!ompi_mpi_dynamics_is_enabled(FUNC_NAME)) { return OMPI_ERRHANDLER_INVOKE(comm, OMPI_ERR_NOT_SUPPORTED, FUNC_NAME); } + printf("MPI D\n"); + fflush(stdout); + if (rank == root) { if (MPI_INFO_NULL == array_of_info[0]) { non_mpi = false; } else { - ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, - &flag); + ompi_info_get_bool(array_of_info[0], "ompi_non_mpi", &non_mpi, &flag); if (!flag) { non_mpi = false; } @@ -152,7 +153,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o } #if OPAL_ENABLE_FT_MPI - if( OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc)) ) { + if (OPAL_UNLIKELY(!ompi_comm_iface_coll_check(comm, &rc))) { return OMPI_ERRHANDLER_INVOKE(comm, rc, FUNC_NAME); } #endif @@ -160,12 +161,14 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o /* initialize the port name to avoid problems */ memset(port_name, 0, MPI_MAX_PORT_NAME); + printf("MPI E\n"); + fflush(stdout); - if ( rank == root ) { + if (rank == root) { if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ - if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -173,41 +176,46 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o rc = OMPI_ERR_NOT_SUPPORTED; goto error; } - if (OMPI_SUCCESS != (rc = ompi_dpm_spawn(count, (const char **) array_of_commands, - array_of_argv, array_of_maxprocs, - array_of_info, port_name))) { + if (OMPI_SUCCESS + != (rc = ompi_dpm_spawn(count, (const char **) array_of_commands, array_of_argv, + array_of_maxprocs, array_of_info, port_name))) { goto error; } } + printf("MPI F\n"); + fflush(stdout); + error: + + printf("MPI G\n"); + fflush(stdout); if (OMPI_SUCCESS != rc) { /* There was an error in one of the above stages, * we still need to do the connect_accept stage so that * non-root ranks do not deadlock. * Add the error code to the port string for connect_accept * to propagate the error code. */ - (void)opal_asprintf(&port_string, "%s:error=%d", port_name, rc); - } - else { + (void) opal_asprintf(&port_string, "%s:error=%d", port_name, rc); + } else { port_string = port_name; } if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm_connect_accept (comm, root, port_string, send_first, &newcomp); + rc = ompi_dpm_connect_accept(comm, root, port_string, send_first, &newcomp); } + printf("MPI H\n"); + fflush(stdout); + if (OPAL_ERR_NOT_SUPPORTED == rc) { - opal_show_help("help-mpi-api.txt", - "MPI function not supported", - true, - FUNC_NAME, + opal_show_help("help-mpi-api.txt", "MPI function not supported", true, FUNC_NAME, "Underlying runtime environment does not support spawn functionality"); } - if(port_string != port_name) { + if (port_string != port_name) { free(port_string); } @@ -216,21 +224,26 @@ error: ompi_dpm_close_port(port_name); } + printf("MPI I\n"); + fflush(stdout); + /* set array of errorcodes */ if (MPI_ERRCODES_IGNORE != array_of_errcodes) { if (MPI_COMM_NULL != newcomp) { size = newcomp->c_remote_group->grp_proc_count; } else { - for ( i=0; i < count; i++) { + for (i = 0; i < count; i++) { size = size + array_of_maxprocs[i]; } } - for ( i=0; i < size; i++ ) { - array_of_errcodes[i]=rc; + for (i = 0; i < size; i++) { + array_of_errcodes[i] = rc; } } + printf("MPI J\n"); + fflush(stdout); + *intercomm = newcomp; - OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME); + OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME); } - diff --git a/ompi/mpi/c/finalize.c b/ompi/mpi/c/finalize.c index be7989261b..b10f44a027 100644 --- a/ompi/mpi/c/finalize.c +++ b/ompi/mpi/c/finalize.c @@ -20,33 +20,40 @@ #include "ompi_config.h" -#include "ompi/mpi/c/bindings.h" -#include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" +#include "ompi/mpi/c/bindings.h" #include "ompi/runtime/ompi_spc.h" +#include "ompi/runtime/params.h" #if OMPI_BUILD_MPI_PROFILING -#if OPAL_HAVE_WEAK_SYMBOLS -#pragma weak MPI_Finalize = PMPI_Finalize -#endif -#define MPI_Finalize PMPI_Finalize +# if OPAL_HAVE_WEAK_SYMBOLS +# pragma weak MPI_Finalize = PMPI_Finalize +# endif +# define MPI_Finalize PMPI_Finalize #endif static const char FUNC_NAME[] = "MPI_Finalize"; - int MPI_Finalize(void) { /* If --with-spc and ompi_mpi_spc_dump_enabled were specified, print * all of the final SPC values aggregated across the whole MPI run. * Also, free all SPC memory. */ + + printf("FIN 1\n"); + fflush(stdout); SPC_FINI(); + printf("FIN 2\n"); + fflush(stdout); if (MPI_PARAM_CHECK) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); } + printf("FIN 3\n"); + fflush(stdout); + /* Pretty simple */ return ompi_mpi_finalize(); diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 49ece5f9e9..f023ed8b6f 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -35,67 +35,67 @@ #include "ompi_config.h" #ifdef HAVE_SYS_TYPES_H -#include <sys/types.h> +# include <sys/types.h> #endif #ifdef HAVE_UNISTD_H -#include <unistd.h> +# include <unistd.h> #endif #ifdef HAVE_SYS_PARAM_H -#include <sys/param.h> +# include <sys/param.h> #endif #ifdef HAVE_NETDB_H -#include <netdb.h> +# include <netdb.h> #endif -#include "opal/util/event.h" -#include "opal/util/output.h" -#include "opal/runtime/opal_progress.h" +#include "opal/mca/allocator/base/base.h" #include "opal/mca/base/base.h" -#include "opal/sys/atomic.h" -#include "opal/runtime/opal.h" -#include "opal/util/show_help.h" -#include "opal/util/opal_environ.h" #include "opal/mca/mpool/base/base.h" #include "opal/mca/mpool/base/mpool_base_tree.h" -#include "opal/mca/rcache/base/base.h" -#include "opal/mca/allocator/base/base.h" #include "opal/mca/pmix/pmix-internal.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress.h" +#include "opal/sys/atomic.h" +#include "opal/util/event.h" +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" #include "opal/util/timings.h" #include "mpi.h" -#include "ompi/constants.h" -#include "ompi/errhandler/errcode.h" +#include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" +#include "ompi/constants.h" #include "ompi/datatype/ompi_datatype.h" -#include "ompi/message/message.h" -#include "ompi/op/op.h" +#include "ompi/dpm/dpm.h" +#include "ompi/errhandler/errcode.h" #include "ompi/file/file.h" #include "ompi/info/info.h" -#include "ompi/runtime/mpiruntime.h" -#include "ompi/attribute/attribute.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "ompi/mca/pml/base/base.h" #include "ompi/mca/bml/base/base.h" -#include "ompi/mca/osc/base/base.h" -#include "ompi/mca/part/base/base.h" -#include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/bml/bml.h" #include "ompi/mca/coll/base/base.h" -#include "ompi/runtime/ompi_rte.h" -#include "ompi/mca/topo/base/base.h" -#include "ompi/mca/io/io.h" +#include "ompi/mca/coll/base/coll_base_functions.h" +#include "ompi/mca/hook/base/base.h" #include "ompi/mca/io/base/base.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/part/base/base.h" +#include "ompi/mca/pml/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/runtime/params.h" -#include "ompi/dpm/dpm.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/topo/base/base.h" +#include "ompi/message/message.h" #include "ompi/mpiext/mpiext.h" -#include "ompi/mca/hook/base/base.h" +#include "ompi/op/op.h" +#include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/ompi_rte.h" +#include "ompi/runtime/params.h" extern bool ompi_enable_timing; static void fence_cbfunc(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + volatile bool *active = (volatile bool *) cbdata; OPAL_ACQUIRE_OBJECT(active); *active = false; OPAL_POST_OBJECT(active); @@ -105,18 +105,20 @@ int ompi_mpi_finalize(void) { int ret = MPI_SUCCESS; opal_list_item_t *item; - ompi_proc_t** procs; + ompi_proc_t **procs; size_t nprocs; volatile bool active; uint32_t key; - ompi_datatype_t * datatype; + ompi_datatype_t *datatype; pmix_status_t rc; + printf("FIN 4\n"); + fflush(stdout); + ompi_hook_base_mpi_finalize_top(); int32_t state = ompi_mpi_state; - if (state < OMPI_MPI_STATE_INIT_COMPLETED || - state >= OMPI_MPI_STATE_FINALIZE_STARTED) { + if (state < OMPI_MPI_STATE_INIT_COMPLETED || state >= OMPI_MPI_STATE_FINALIZE_STARTED) { /* Note that if we're not initialized or already finalized, we cannot raise an MPI error. The best that we can do is write something to stderr. */ @@ -125,13 +127,11 @@ int ompi_mpi_finalize(void) hostname = opal_gethostname(); if (state < OMPI_MPI_STATE_INIT_COMPLETED) { - opal_show_help("help-mpi-runtime.txt", - "mpi_finalize: not initialized", - true, hostname, pid); + opal_show_help("help-mpi-runtime.txt", "mpi_finalize: not initialized", true, hostname, + pid); } else if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) { - opal_show_help("help-mpi-runtime.txt", - "mpi_finalize:invoked_multiple_times", - true, hostname, pid); + opal_show_help("help-mpi-runtime.txt", "mpi_finalize:invoked_multiple_times", true, + hostname, pid); } return MPI_ERR_OTHER; } @@ -140,36 +140,39 @@ int ompi_mpi_finalize(void) ompi_mpiext_fini(); + printf("FIN 5\n"); + fflush(stdout); + /* Per MPI-2:4.8, we have to free MPI_COMM_SELF before doing anything else in MPI_FINALIZE (to include setting up such that MPI_FINALIZED will return true). */ if (NULL != ompi_mpi_comm_self.comm.c_keyhash) { - ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self, - ompi_mpi_comm_self.comm.c_keyhash); + ompi_attr_delete_all(COMM_ATTR, &ompi_mpi_comm_self, ompi_mpi_comm_self.comm.c_keyhash); OBJ_RELEASE(ompi_mpi_comm_self.comm.c_keyhash); ompi_mpi_comm_self.comm.c_keyhash = NULL; } #if OPAL_ENABLE_FT_MPI - if( ompi_ftmpi_enabled ) { - ompi_communicator_t* comm = &ompi_mpi_comm_world.comm; - OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "FT: Rank %d entering finalize", ompi_comm_rank(comm))); + if (ompi_ftmpi_enabled) { + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + OPAL_OUTPUT_VERBOSE( + (50, ompi_ftmpi_output_handle, "FT: Rank %d entering finalize", ompi_comm_rank(comm))); /* grpcomm barrier does not tolerate /new/ failures. Let's make sure * we drain all preexisting failures before we proceed; * TODO: when we have better failure support in the runtime, we can * remove that agreement */ - ompi_communicator_t* ncomm; + ompi_communicator_t *ncomm; ret = ompi_comm_shrink_internal(comm, &ncomm); - if( MPI_SUCCESS != ret ) { + if (MPI_SUCCESS != ret) { OMPI_ERROR_LOG(ret); goto done; } /* do a barrier with closest neighbors in the ring, using doublering as * it is synchronous and will help flush all past communications */ ret = ompi_coll_base_barrier_intra_doublering(ncomm, ncomm->c_coll->coll_barrier_module); - if( MPI_SUCCESS != ret ) { + if (MPI_SUCCESS != ret) { OMPI_ERROR_LOG(ret); goto done; } @@ -178,12 +181,14 @@ int ompi_mpi_finalize(void) /* finalize the fault tolerant infrastructure (revoke, * failure propagator, etc). From now-on we do not tolerate new failures. */ - OPAL_OUTPUT_VERBOSE((50, ompi_ftmpi_output_handle, "FT: Rank %05d turning off FT", ompi_comm_rank(comm))); + OPAL_OUTPUT_VERBOSE( + (50, ompi_ftmpi_output_handle, "FT: Rank %05d turning off FT", ompi_comm_rank(comm))); ompi_comm_failure_detector_finalize(); ompi_comm_failure_propagator_finalize(); ompi_comm_revoke_finalize(); ompi_comm_rbcast_finalize(); - opal_output_verbose(40, ompi_ftmpi_output_handle, "Rank %05d: DONE WITH FINALIZE", ompi_comm_rank(comm)); + opal_output_verbose(40, ompi_ftmpi_output_handle, "Rank %05d: DONE WITH FINALIZE", + ompi_comm_rank(comm)); } #endif /* OPAL_ENABLE_FT_MPI */ @@ -193,18 +198,20 @@ int ompi_mpi_finalize(void) COMM_SELF is destroyed / all the attribute callbacks have been invoked) */ opal_atomic_wmb(); - opal_atomic_swap_32(&ompi_mpi_state, - OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT); + opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT); /* As finalize is the last legal MPI call, we are allowed to force the release * of the user buffer used for bsend, before going anywhere further. */ - (void)mca_pml_base_bsend_detach(NULL, NULL); + (void) mca_pml_base_bsend_detach(NULL, NULL); #if OPAL_ENABLE_PROGRESS_THREADS == 0 opal_progress_set_event_flag(OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK); #endif + printf("FIN 6\n"); + fflush(stdout); + /* Redo ORTE calling opal_progress_event_users_increment() during MPI lifetime, to get better latency when not using TCP */ opal_progress_event_users_increment(); @@ -293,7 +300,8 @@ int ompi_mpi_finalize(void) * communications/actions to complete. See * https://github.com/open-mpi/ompi/issues/1576 for the * original bug report. */ - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active))) { + if (PMIX_SUCCESS + != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void *) &active))) { ret = opal_pmix_convert_status(rc); OMPI_ERROR_LOG(ret); /* Reset the active flag to false, to avoid waiting for @@ -303,6 +311,9 @@ int ompi_mpi_finalize(void) OMPI_LAZY_WAIT_FOR_COMPLETION(active); } + printf("FIN 7\n"); + fflush(stdout); + /* Shut down any bindings-specific issues: C++, F77, F90 */ /* Remove all memory associated by MPI_REGISTER_DATAREP (per @@ -315,16 +326,19 @@ int ompi_mpi_finalize(void) OBJ_DESTRUCT(&ompi_registered_datareps); /* Remove all F90 types from the hash tables */ - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) + OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_integer_hashtable) OBJ_RELEASE(datatype); OBJ_DESTRUCT(&ompi_mpi_f90_integer_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_real_hashtable) + OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_real_hashtable) OBJ_RELEASE(datatype); OBJ_DESTRUCT(&ompi_mpi_f90_real_hashtable); - OPAL_HASH_TABLE_FOREACH(key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) + OPAL_HASH_TABLE_FOREACH (key, uint32, datatype, &ompi_mpi_f90_complex_hashtable) OBJ_RELEASE(datatype); OBJ_DESTRUCT(&ompi_mpi_f90_complex_hashtable); + printf("FIN 7a\n"); + fflush(stdout); + /* Free communication objects */ /* free file resources */ @@ -332,6 +346,9 @@ int ompi_mpi_finalize(void) goto done; } + printf("FIN 7b\n"); + fflush(stdout); + /* free window resources */ if (OMPI_SUCCESS != (ret = ompi_win_finalize())) { goto done; @@ -343,6 +360,8 @@ int ompi_mpi_finalize(void) goto done; } + printf("FIN 7c\n"); + fflush(stdout); /* free communicator resources. this MUST come before finalizing the PML * as this will call into the pml */ @@ -350,16 +369,19 @@ int ompi_mpi_finalize(void) goto done; } + printf("FIN 8\n"); + fflush(stdout); + /* call del_procs on all allocated procs even though some may not be known * to the pml layer. the pml layer is expected to be resilient and ignore * any unknown procs. */ nprocs = 0; - procs = ompi_proc_get_allocated (&nprocs); + procs = ompi_proc_get_allocated(&nprocs); MCA_PML_CALL(del_procs(procs, nprocs)); free(procs); /* free pml resource */ - if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { + if (OMPI_SUCCESS != (ret = mca_pml_base_finalize())) { goto done; } @@ -380,7 +402,7 @@ int ompi_mpi_finalize(void) /* Now that all MPI objects dealing with communications are gone, shut down MCA types having to do with communications */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) { + if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework))) { OMPI_ERROR_LOG(ret); goto done; } @@ -401,7 +423,7 @@ int ompi_mpi_finalize(void) } /* finalize the DPM subsystem */ - if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { + if (OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { goto done; } @@ -420,6 +442,9 @@ int ompi_mpi_finalize(void) goto done; } + printf("FIN 9\n"); + fflush(stdout); + /* Free all other resources */ /* free op resources */ @@ -472,8 +497,11 @@ int ompi_mpi_finalize(void) goto done; } + printf("FIN 10\n"); + fflush(stdout); + /* free proc resources */ - if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) { + if (OMPI_SUCCESS != (ret = ompi_proc_finalize())) { goto done; } @@ -494,7 +522,7 @@ int ompi_mpi_finalize(void) ompi_rte_initialized = false; /* Now close the hook framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework) ) ) { + if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_hook_base_framework))) { OMPI_ERROR_LOG(ret); goto done; } @@ -516,11 +544,19 @@ int ompi_mpi_finalize(void) /* All done */ - done: + printf("FIN 11\n"); + fflush(stdout); + +done: + + printf("FIN 12\n"); + fflush(stdout); opal_atomic_wmb(); opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED); ompi_hook_base_mpi_finalize_bottom(); + printf("FIN 13\n"); + fflush(stdout); return ret; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 69c182899d..da5a15d639 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -37,79 +37,78 @@ #include "ompi_config.h" #ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif /* HAVE_SYS_TIME_H */ +# include <sys/time.h> +#endif /* HAVE_SYS_TIME_H */ #include <pthread.h> #ifdef HAVE_UNISTD_H -#include <unistd.h> +# include <unistd.h> #endif #include "mpi.h" #include "opal/class/opal_list.h" +#include "opal/mca/allocator/base/base.h" #include "opal/mca/base/base.h" +#include "opal/mca/btl/base/base.h" #include "opal/mca/hwloc/base/base.h" -#include "opal/runtime/opal_progress.h" +#include "opal/mca/mpool/base/base.h" +#include "opal/mca/pmix/base/base.h" +#include "opal/mca/rcache/base/base.h" +#include "opal/mca/rcache/rcache.h" #include "opal/mca/threads/threads.h" +#include "opal/runtime/opal.h" +#include "opal/runtime/opal_progress.h" #include "opal/util/arch.h" #include "opal/util/argv.h" -#include "opal/util/output.h" #include "opal/util/error.h" -#include "opal/util/stacktrace.h" -#include "opal/util/show_help.h" -#include "opal/runtime/opal.h" #include "opal/util/event.h" -#include "opal/mca/allocator/base/base.h" -#include "opal/mca/rcache/base/base.h" -#include "opal/mca/rcache/rcache.h" -#include "opal/mca/mpool/base/base.h" -#include "opal/mca/btl/base/base.h" -#include "opal/mca/pmix/base/base.h" -#include "opal/util/timings.h" #include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/util/stacktrace.h" +#include "opal/util/timings.h" -#include "ompi/constants.h" -#include "ompi/mpi/fortran/base/constants.h" -#include "ompi/runtime/mpiruntime.h" -#include "ompi/runtime/params.h" +#include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" -#include "ompi/info/info.h" +#include "ompi/constants.h" +#include "ompi/debuggers/debuggers.h" +#include "ompi/dpm/dpm.h" #include "ompi/errhandler/errcode.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/interlib/interlib.h" -#include "ompi/request/request.h" -#include "ompi/message/message.h" -#include "ompi/op/op.h" -#include "ompi/mca/op/op.h" -#include "ompi/mca/op/base/base.h" #include "ompi/file/file.h" -#include "ompi/attribute/attribute.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "ompi/mca/pml/base/base.h" +#include "ompi/info/info.h" +#include "ompi/interlib/interlib.h" #include "ompi/mca/bml/base/base.h" -#include "ompi/mca/osc/base/base.h" -#include "ompi/mca/part/base/base.h" +#include "ompi/mca/bml/bml.h" #include "ompi/mca/coll/base/base.h" -#include "ompi/mca/io/io.h" +#include "ompi/mca/hook/base/base.h" #include "ompi/mca/io/base/base.h" -#include "ompi/runtime/ompi_rte.h" -#include "ompi/debuggers/debuggers.h" -#include "ompi/proc/proc.h" +#include "ompi/mca/io/io.h" +#include "ompi/mca/op/base/base.h" +#include "ompi/mca/op/op.h" +#include "ompi/mca/osc/base/base.h" +#include "ompi/mca/part/base/base.h" +#include "ompi/mca/pml/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/dpm/dpm.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/message/message.h" +#include "ompi/mpi/fortran/base/constants.h" #include "ompi/mpiext/mpiext.h" -#include "ompi/mca/hook/base/base.h" +#include "ompi/op/op.h" +#include "ompi/proc/proc.h" +#include "ompi/request/request.h" +#include "ompi/runtime/mpiruntime.h" +#include "ompi/runtime/ompi_rte.h" +#include "ompi/runtime/params.h" #include "ompi/util/timings.h" /* newer versions of gcc have poisoned this deprecated feature */ #ifdef HAVE___MALLOC_INITIALIZE_HOOK -#include "opal/mca/memory/base/base.h" +# include "opal/mca/memory/base/base.h" /* So this sucks, but with OPAL in its own library that is brought in implicity from libmpi, there are times when the malloc initialize hook in the memory component doesn't work. So we have to do it from here, since any MPI code is going to call MPI_Init... */ -OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = - opal_memory_base_malloc_init_hook; +OPAL_DECLSPEC void (*__malloc_initialize_hook)(void) = opal_memory_base_malloc_init_hook; #endif /* This is required for the boundaries of the hash tables used to store @@ -118,9 +117,9 @@ OPAL_DECLSPEC void (*__malloc_initialize_hook) (void) = #include <float.h> #if OPAL_CC_USE_PRAGMA_IDENT -#pragma ident OMPI_IDENT_STRING +# pragma ident OMPI_IDENT_STRING #elif OPAL_CC_USE_IDENT -#ident OMPI_IDENT_STRING +# ident OMPI_IDENT_STRING #endif const char ompi_version_string[] = OMPI_IDENT_STRING; @@ -144,37 +143,35 @@ opal_thread_t *ompi_mpi_main_thread = NULL; */ ompi_predefined_datatype_t *ompi_mpi_character_addr = &ompi_mpi_character; -ompi_predefined_datatype_t *ompi_mpi_logical_addr = &ompi_mpi_logical; -ompi_predefined_datatype_t *ompi_mpi_logical1_addr = &ompi_mpi_logical1; -ompi_predefined_datatype_t *ompi_mpi_logical2_addr = &ompi_mpi_logical2; -ompi_predefined_datatype_t *ompi_mpi_logical4_addr = &ompi_mpi_logical4; -ompi_predefined_datatype_t *ompi_mpi_logical8_addr = &ompi_mpi_logical8; -ompi_predefined_datatype_t *ompi_mpi_integer_addr = &ompi_mpi_integer; -ompi_predefined_datatype_t *ompi_mpi_integer1_addr = &ompi_mpi_integer1; -ompi_predefined_datatype_t *ompi_mpi_integer2_addr = &ompi_mpi_integer2; -ompi_predefined_datatype_t *ompi_mpi_integer4_addr = &ompi_mpi_integer4; -ompi_predefined_datatype_t *ompi_mpi_integer8_addr = &ompi_mpi_integer8; +ompi_predefined_datatype_t *ompi_mpi_logical_addr = &ompi_mpi_logical; +ompi_predefined_datatype_t *ompi_mpi_logical1_addr = &ompi_mpi_logical1; +ompi_predefined_datatype_t *ompi_mpi_logical2_addr = &ompi_mpi_logical2; +ompi_predefined_datatype_t *ompi_mpi_logical4_addr = &ompi_mpi_logical4; +ompi_predefined_datatype_t *ompi_mpi_logical8_addr = &ompi_mpi_logical8; +ompi_predefined_datatype_t *ompi_mpi_integer_addr = &ompi_mpi_integer; +ompi_predefined_datatype_t *ompi_mpi_integer1_addr = &ompi_mpi_integer1; +ompi_predefined_datatype_t *ompi_mpi_integer2_addr = &ompi_mpi_integer2; +ompi_predefined_datatype_t *ompi_mpi_integer4_addr = &ompi_mpi_integer4; +ompi_predefined_datatype_t *ompi_mpi_integer8_addr = &ompi_mpi_integer8; ompi_predefined_datatype_t *ompi_mpi_integer16_addr = &ompi_mpi_integer16; -ompi_predefined_datatype_t *ompi_mpi_real_addr = &ompi_mpi_real; -ompi_predefined_datatype_t *ompi_mpi_real2_addr = &ompi_mpi_real2; -ompi_predefined_datatype_t *ompi_mpi_real4_addr = &ompi_mpi_real4; -ompi_predefined_datatype_t *ompi_mpi_real8_addr = &ompi_mpi_real8; -ompi_predefined_datatype_t *ompi_mpi_real16_addr = &ompi_mpi_real16; -ompi_predefined_datatype_t *ompi_mpi_dblprec_addr = &ompi_mpi_dblprec; -ompi_predefined_datatype_t *ompi_mpi_cplex_addr = &ompi_mpi_cplex; -ompi_predefined_datatype_t *ompi_mpi_complex4_addr = &ompi_mpi_complex4; -ompi_predefined_datatype_t *ompi_mpi_complex8_addr = &ompi_mpi_complex8; +ompi_predefined_datatype_t *ompi_mpi_real_addr = &ompi_mpi_real; +ompi_predefined_datatype_t *ompi_mpi_real2_addr = &ompi_mpi_real2; +ompi_predefined_datatype_t *ompi_mpi_real4_addr = &ompi_mpi_real4; +ompi_predefined_datatype_t *ompi_mpi_real8_addr = &ompi_mpi_real8; +ompi_predefined_datatype_t *ompi_mpi_real16_addr = &ompi_mpi_real16; +ompi_predefined_datatype_t *ompi_mpi_dblprec_addr = &ompi_mpi_dblprec; +ompi_predefined_datatype_t *ompi_mpi_cplex_addr = &ompi_mpi_cplex; +ompi_predefined_datatype_t *ompi_mpi_complex4_addr = &ompi_mpi_complex4; +ompi_predefined_datatype_t *ompi_mpi_complex8_addr = &ompi_mpi_complex8; ompi_predefined_datatype_t *ompi_mpi_complex16_addr = &ompi_mpi_complex16; ompi_predefined_datatype_t *ompi_mpi_complex32_addr = &ompi_mpi_complex32; -ompi_predefined_datatype_t *ompi_mpi_dblcplex_addr = &ompi_mpi_dblcplex; -ompi_predefined_datatype_t *ompi_mpi_2real_addr = &ompi_mpi_2real; -ompi_predefined_datatype_t *ompi_mpi_2dblprec_addr = &ompi_mpi_2dblprec; -ompi_predefined_datatype_t *ompi_mpi_2integer_addr = &ompi_mpi_2integer; +ompi_predefined_datatype_t *ompi_mpi_dblcplex_addr = &ompi_mpi_dblcplex; +ompi_predefined_datatype_t *ompi_mpi_2real_addr = &ompi_mpi_2real; +ompi_predefined_datatype_t *ompi_mpi_2dblprec_addr = &ompi_mpi_2dblprec; +ompi_predefined_datatype_t *ompi_mpi_2integer_addr = &ompi_mpi_2integer; -struct ompi_status_public_t *ompi_mpi_status_ignore_addr = - (ompi_status_public_t *) 0; -struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr = - (ompi_status_public_t *) 0; +struct ompi_status_public_t *ompi_mpi_status_ignore_addr = (ompi_status_public_t *) 0; +struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr = (ompi_status_public_t *) 0; /* * These variables are here, rather than under ompi/mpi/c/foo.c @@ -214,26 +211,25 @@ struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr = * complain. */ #if OMPI_BUILD_FORTRAN_BINDINGS -# if OMPI_FORTRAN_CAPS -MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUS_IGNORE; -MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUSES_IGNORE; -# elif OMPI_FORTRAN_PLAIN -MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore; -MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore; -# elif OMPI_FORTRAN_SINGLE_UNDERSCORE -MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore_; -MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore_; -# elif OMPI_FORTRAN_DOUBLE_UNDERSCORE -MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore__; -MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore__; -# else -# error Unrecognized Fortran name mangling scheme -# endif +# if OMPI_FORTRAN_CAPS +MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &MPI_FORTRAN_STATUS_IGNORE; +MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &MPI_FORTRAN_STATUSES_IGNORE; +# elif OMPI_FORTRAN_PLAIN +MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore; +MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore; +# elif OMPI_FORTRAN_SINGLE_UNDERSCORE +MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore_; +MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore_; +# elif OMPI_FORTRAN_DOUBLE_UNDERSCORE +MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint *) &mpi_fortran_status_ignore__; +MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint *) &mpi_fortran_statuses_ignore__; +# else +# error Unrecognized Fortran name mangling scheme +# endif #else MPI_Fint *MPI_F_STATUS_IGNORE = NULL; MPI_Fint *MPI_F_STATUSES_IGNORE = NULL; -#endif /* OMPI_BUILD_FORTRAN_BINDINGS */ - +#endif /* OMPI_BUILD_FORTRAN_BINDINGS */ /* Constants for the Fortran layer. These values are referred to via common blocks in the Fortran equivalents. See @@ -286,29 +282,25 @@ extern int ompi_mpi_event_tick_rate; * Static functions used to configure the interactions between the OPAL and * the runtime. */ -static char* -_process_name_print_for_opal(const opal_process_name_t procname) +static char *_process_name_print_for_opal(const opal_process_name_t procname) { - ompi_process_name_t* rte_name = (ompi_process_name_t*)&procname; + ompi_process_name_t *rte_name = (ompi_process_name_t *) &procname; return OMPI_NAME_PRINT(rte_name); } -static int -_process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2) +static int _process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2) { - ompi_process_name_t* o1 = (ompi_process_name_t*)&p1; - ompi_process_name_t* o2 = (ompi_process_name_t*)&p2; + ompi_process_name_t *o1 = (ompi_process_name_t *) &p1; + ompi_process_name_t *o2 = (ompi_process_name_t *) &p2; return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); } -static int _convert_string_to_process_name(opal_process_name_t *name, - const char* name_string) +static int _convert_string_to_process_name(opal_process_name_t *name, const char *name_string) { return ompi_rte_convert_string_to_process_name(name, name_string); } -static int _convert_process_name_to_string(char** name_string, - const opal_process_name_t *name) +static int _convert_process_name_to_string(char **name_string, const opal_process_name_t *name) { return ompi_rte_convert_process_name_to_string(name_string, name); } @@ -332,8 +324,7 @@ void ompi_mpi_thread_level(int requested, int *provided) ompi_mpi_main_thread = opal_thread_get_self(); } - ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == - MPI_THREAD_MULTIPLE); + ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == MPI_THREAD_MULTIPLE); } static int ompi_register_mca_variables(void) @@ -352,10 +343,8 @@ static int ompi_register_mca_variables(void) ompi_enable_timing = false; (void) mca_base_var_register("ompi", "ompi", NULL, "timing", "Request that critical timing loops be measured", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_enable_timing); + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &ompi_enable_timing); #if OPAL_ENABLE_FT_MPI /* Before loading any other part of the MPI library, we need to load @@ -363,7 +352,7 @@ static int ompi_register_mca_variables(void) * FT is desired ON; this does override openmpi-params.conf, but not * command line or env. */ - if( ompi_ftmpi_enabled ) { + if (ompi_ftmpi_enabled) { mca_base_var_load_extra_files("ft-mpi", false); } #endif /* OPAL_ENABLE_FT_MPI */ @@ -373,34 +362,30 @@ static int ompi_register_mca_variables(void) static void fence_release(pmix_status_t status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; + volatile bool *active = (volatile bool *) cbdata; OPAL_ACQUIRE_OBJECT(active); *active = false; OPAL_POST_OBJECT(active); } -static void evhandler_reg_callbk(pmix_status_t status, - size_t evhandler_ref, - void *cbdata) +static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { - opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + opal_pmix_lock_t *lock = (opal_pmix_lock_t *) cbdata; lock->status = status; OPAL_PMIX_WAKEUP_THREAD(lock); } - -int ompi_mpi_init(int argc, char **argv, int requested, int *provided, - bool reinit_ok) +int ompi_mpi_init(int argc, char **argv, int requested, int *provided, bool reinit_ok) { int ret; - ompi_proc_t** procs; + ompi_proc_t **procs; size_t nprocs; char *error = NULL; volatile bool active; bool background_fence = false; pmix_info_t info[2]; - pmix_status_t codes[1] = { PMIX_ERR_PROC_ABORTED }; + pmix_status_t codes[1] = {PMIX_ERR_PROC_ABORTED}; pmix_status_t rc; OMPI_TIMING_INIT(64); opal_pmix_lock_t mylock; @@ -410,16 +395,14 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* Ensure that we were not already initialized or finalized. */ int32_t expected = OMPI_MPI_STATE_NOT_INITIALIZED; - int32_t desired = OMPI_MPI_STATE_INIT_STARTED; + int32_t desired = OMPI_MPI_STATE_INIT_STARTED; opal_atomic_wmb(); - if (!opal_atomic_compare_exchange_strong_32(&ompi_mpi_state, &expected, - desired)) { + if (!opal_atomic_compare_exchange_strong_32(&ompi_mpi_state, &expected, desired)) { // If we failed to atomically transition ompi_mpi_state from // NOT_INITIALIZED to INIT_STARTED, then someone else already // did that, and we should return. if (expected >= OMPI_MPI_STATE_FINALIZE_STARTED) { - opal_show_help("help-mpi-runtime.txt", - "mpi_init: already finalized", true); + opal_show_help("help-mpi-runtime.txt", "mpi_init: already finalized", true); return MPI_ERR_OTHER; } else if (expected >= OMPI_MPI_STATE_INIT_STARTED) { // In some cases (e.g., oshmem_shmem_init()), we may call @@ -433,8 +416,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, return MPI_SUCCESS; } - opal_show_help("help-mpi-runtime.txt", - "mpi_init: invoked multiple times", true); + opal_show_help("help-mpi-runtime.txt", "mpi_init: invoked multiple times", true); return MPI_ERR_OTHER; } } @@ -508,7 +490,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, goto error; } - if (OPAL_SUCCESS != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { + if (OPAL_SUCCESS + != (ret = opal_arch_set_fortran_logical_size(sizeof(ompi_fortran_logical_t)))) { error = "ompi_mpi_init: opal_arch_set_fortran_logical_size failed"; goto error; } @@ -547,7 +530,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - OMPI_TIMING_NEXT("initialization"); /* Setup RTE */ @@ -567,7 +549,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* give it a name so we can distinguish it */ PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING); OPAL_PMIX_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock); + PMIx_Register_event_handler(codes, 1, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, + (void *) &mylock); OPAL_PMIX_WAIT_THREAD(&mylock); rc = mylock.status; OPAL_PMIX_DESTRUCT_LOCK(&mylock); @@ -609,9 +592,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, error = "ompi_op_base_open() failed"; goto error; } - if (OMPI_SUCCESS != - (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS + != (ret = ompi_op_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, + ompi_mpi_thread_multiple))) { error = "ompi_op_base_find_available() failed"; goto error; } @@ -638,7 +621,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, error = "mca_bml_base_open() failed"; goto error; } - if (OMPI_SUCCESS != (ret = mca_bml_base_init (1, ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS != (ret = mca_bml_base_init(1, ompi_mpi_thread_multiple))) { error = "mca_bml_base_init() failed"; goto error; } @@ -655,7 +638,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, error = "ompi_osc_base_open() failed"; goto error; } - + if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_part_base_framework, 0))) { error = "ompi_part_base_open() failed"; goto error; @@ -669,9 +652,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* Select which MPI components to use */ - if (OMPI_SUCCESS != - (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS + != (ret = mca_pml_base_select(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "mca_pml_base_select() failed"; goto error; } @@ -691,8 +673,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, } OMPI_TIMING_NEXT("commit"); #if (OPAL_ENABLE_TIMING) - if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && - opal_pmix_collect_all_data && !ompi_singleton) { + if (OMPI_TIMING_ENABLED && !opal_pmix_base_async_modex && opal_pmix_collect_all_data + && !ompi_singleton) { if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { ret - opal_pmix_convert_status(rc); error = "timing: pmix-barrier-1 failed"; @@ -723,9 +705,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { + if (PMIX_SUCCESS + != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void *) &active))) { ret = opal_pmix_convert_status(rc); error = "PMIx_Fence_nb() failed"; goto error; @@ -739,8 +720,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); - if( PMIX_SUCCESS != rc) { + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void *) &active); + if (PMIX_SUCCESS != rc) { ret = opal_pmix_convert_status(rc); error = "PMIx_Fence() failed"; goto error; @@ -753,30 +734,27 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, OMPI_TIMING_NEXT("modex"); /* select buffered send allocator component to be used */ - if( OMPI_SUCCESS != - (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS != (ret = mca_pml_base_bsend_init(ompi_mpi_thread_multiple))) { error = "mca_pml_base_bsend_init() failed"; goto error; } - if (OMPI_SUCCESS != - (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS + != (ret = mca_coll_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, + ompi_mpi_thread_multiple))) { error = "mca_coll_base_find_available() failed"; goto error; } - if (OMPI_SUCCESS != - (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS + != (ret = ompi_osc_base_find_available(OPAL_ENABLE_PROGRESS_THREADS, + ompi_mpi_thread_multiple))) { error = "ompi_osc_base_find_available() failed"; goto error; } - - if (OMPI_SUCCESS != - (ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS, - ompi_mpi_thread_multiple))) { + if (OMPI_SUCCESS + != (ret = mca_part_base_select(OPAL_ENABLE_PROGRESS_THREADS, ompi_mpi_thread_multiple))) { error = "mca_part_base_select() failed"; goto error; } @@ -860,7 +838,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* start PML/BTL's */ ret = MCA_PML_CALL(enable(true)); - if( OMPI_SUCCESS != ret ) { + if (OMPI_SUCCESS != ret) { error = "PML control failed"; goto error; } @@ -868,15 +846,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* some btls/mtls require we call add_procs with all procs in the job. * since the btls/mtls have no visibility here it is up to the pml to * convey this requirement */ - if (mca_pml_base_requires_world ()) { - if (NULL == (procs = ompi_proc_world (&nprocs))) { + if (mca_pml_base_requires_world()) { + if (NULL == (procs = ompi_proc_world(&nprocs))) { error = "ompi_proc_world () failed"; goto error; } } else { /* add all allocated ompi_proc_t's to PML (below the add_procs limit this * behaves identically to ompi_proc_world ()) */ - if (NULL == (procs = ompi_proc_get_allocated (&nprocs))) { + if (NULL == (procs = ompi_proc_get_allocated(&nprocs))) { error = "ompi_proc_get_allocated () failed"; goto error; } @@ -887,8 +865,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, Otherwise, if we got some other failure, fall through to print a generic message. */ if (OMPI_ERR_UNREACH == ret) { - opal_show_help("help-mpi-runtime.txt", - "mpi_init:startup:pml-add-procs-fail", true); + opal_show_help("help-mpi-runtime.txt", "mpi_init:startup:pml-add-procs-fail", true); error = NULL; goto error; } else if (OMPI_SUCCESS != ret) { @@ -902,19 +879,23 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, #if OPAL_ENABLE_FT_MPI /* initialize the fault tolerant infrastructure (revoke, detector, * propagator) */ - if( ompi_ftmpi_enabled ) { + if (ompi_ftmpi_enabled) { const char *evmethod; rc = ompi_comm_rbcast_init(); - if( OMPI_SUCCESS != rc ) return rc; + if (OMPI_SUCCESS != rc) + return rc; rc = ompi_comm_revoke_init(); - if( OMPI_SUCCESS != rc ) return rc; + if (OMPI_SUCCESS != rc) + return rc; rc = ompi_comm_failure_propagator_init(); - if( OMPI_SUCCESS != rc ) return rc; + if (OMPI_SUCCESS != rc) + return rc; rc = ompi_comm_failure_detector_init(); - if( OMPI_SUCCESS != rc ) return rc; + if (OMPI_SUCCESS != rc) + return rc; evmethod = event_base_get_method(opal_sync_event_base); - if( 0 == strcmp("select", evmethod) ) { + if (0 == strcmp("select", evmethod)) { opal_show_help("help-mpi-ft.txt", "module:event:selectbug", true); } } @@ -924,8 +905,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, * Dump all MCA parameters if requested */ if (ompi_mpi_show_mca_params) { - ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, - nprocs, + ompi_show_all_mca_params(ompi_mpi_comm_world.comm.c_my_rank, nprocs, ompi_process_info.nodename); } @@ -949,8 +929,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { + if (PMIX_SUCCESS + != (rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void *) &active))) { ret = opal_pmix_convert_status(rc); error = "PMIx_Fence_nb() failed"; goto error; @@ -994,7 +974,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, e.g. hierarch, might create subcommunicators. The threadlevel requested by all processes is required in order to know which cid allocation algorithm can be used. */ - if (OMPI_SUCCESS != ( ret = ompi_comm_cid_init ())) { + if (OMPI_SUCCESS != (ret = ompi_comm_cid_init())) { error = "ompi_mpi_init: ompi_comm_cid_init failed"; goto error; } @@ -1003,14 +983,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, (since dpm.mark_dyncomm is not set in the communicator creation function else), but before dpm.dyncom_init, since this function might require collective for the CID allocation. */ - if (OMPI_SUCCESS != - (ret = mca_coll_base_comm_select(MPI_COMM_WORLD))) { + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(MPI_COMM_WORLD))) { error = "mca_coll_base_comm_select(MPI_COMM_WORLD) failed"; goto error; } - if (OMPI_SUCCESS != - (ret = mca_coll_base_comm_select(MPI_COMM_SELF))) { + if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(MPI_COMM_SELF))) { error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed"; goto error; } @@ -1050,20 +1028,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, #if OPAL_ENABLE_FT_MPI /* start the failure detector */ - if( ompi_ftmpi_enabled ) { + if (ompi_ftmpi_enabled) { rc = ompi_comm_failure_detector_start(); - if( OMPI_SUCCESS != rc ) return rc; + if (OMPI_SUCCESS != rc) + return rc; } #endif /* Fall through */ - error: +error: if (ret != OMPI_SUCCESS) { /* Only print a message if one was not already printed */ if (NULL != error && OMPI_ERR_SILENT != ret) { const char *err_msg = opal_strerror(ret); - opal_show_help("help-mpi-runtime.txt", - "mpi_init:startup:internal-failure", true, + opal_show_help("help-mpi-runtime.txt", "mpi_init:startup:internal-failure", true, "MPI_INIT", "MPI_INIT", error, err_msg, ret); } ompi_hook_base_mpi_init_error(argc, argv, requested, provided); @@ -1077,13 +1055,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, /* Initialize the arrays used to store the F90 types returned by the * MPI_Type_create_f90_XXX functions. */ - OBJ_CONSTRUCT( &ompi_mpi_f90_integer_hashtable, opal_hash_table_t); + OBJ_CONSTRUCT(&ompi_mpi_f90_integer_hashtable, opal_hash_table_t); opal_hash_table_init(&ompi_mpi_f90_integer_hashtable, 16 /* why not? */); - OBJ_CONSTRUCT( &ompi_mpi_f90_real_hashtable, opal_hash_table_t); + OBJ_CONSTRUCT(&ompi_mpi_f90_real_hashtable, opal_hash_table_t); opal_hash_table_init(&ompi_mpi_f90_real_hashtable, FLT_MAX_10_EXP); - OBJ_CONSTRUCT( &ompi_mpi_f90_complex_hashtable, opal_hash_table_t); + OBJ_CONSTRUCT(&ompi_mpi_f90_complex_hashtable, opal_hash_table_t); opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP); /* All done. Wasn't that simple? */ diff --git a/rank-swapper-agent/hello_c.c b/rank-swapper-agent/hello_c.c index 530e135572..e064cd202d 100644 --- a/rank-swapper-agent/hello_c.c +++ b/rank-swapper-agent/hello_c.c @@ -84,8 +84,8 @@ int main(int argc, char *argv[]) // (instead of Comm_multiple) spawns a second, different intercommunicator } else if (2 == SPAWN_MODE) { - int np = 1; - int errcodes[1]; + int np = 2; + int errcodes[2]; MPI_Comm parentcomm; MPI_Comm intercomm[2]; MPI_Comm_get_parent(&parentcomm); @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) } else if (3 == SPAWN_MODE) { - int np[3] = {1, 1, 1}; + int np[2] = {2, 1}; int errcodes[3]; MPI_Comm parentcomm, intercomm; char *cmds[3] = {"/home/ompi/rank-swapper-agent/hello", @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) MPI_Comm_get_parent(&parentcomm); if (parentcomm == MPI_COMM_NULL) { // Create n more processes using the "hello" executable - MPI_Comm_spawn_multiple(3, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, + MPI_Comm_spawn_multiple(2, cmds, MPI_ARGVS_NULL, np, infos, 0, MPI_COMM_WORLD, &intercomm, errcodes); printf("I'm the parent.\n"); fflush(stdout); -- GitLab