Skip to content
Snippets Groups Projects
Commit 1bd253ce authored by René Pascal Becker's avatar René Pascal Becker
Browse files

[WIP] DBG, Init adjustments, Test changes 23.10

parent 98fa06fd
No related branches found
No related tags found
No related merge requests found
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#define BUFFLEN 128 #define BUFFLEN 128
#define JOBID_ENV_VAR "SLURM_VRM_JOBID" #define JOBID_ENV_VAR "SLURM_VRM_JOBID"
#define SLURM_JOBID_ENV_VAR "SLURM_JOB_ID"
/* /*
** Table for Fortran <-> C communicator handle conversion ** Table for Fortran <-> C communicator handle conversion
...@@ -146,6 +147,7 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v ...@@ -146,6 +147,7 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
pid_t pid = getpid(); pid_t pid = getpid();
// Add VRM JobID
const char* vrm_jobid = getenv(JOBID_ENV_VAR); const char* vrm_jobid = getenv(JOBID_ENV_VAR);
char vrm_jobid_with_leading_comma[sizeof(uint64_t) + 1] = ""; char vrm_jobid_with_leading_comma[sizeof(uint64_t) + 1] = "";
if (NULL != vrm_jobid) { if (NULL != vrm_jobid) {
...@@ -155,11 +157,21 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v ...@@ -155,11 +157,21 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
printf("TEST: %s", vrm_jobid_with_leading_comma); printf("TEST: %s", vrm_jobid_with_leading_comma);
} }
// Add Slurm JobID
const char* slurm_jobid = getenv(SLURM_JOBID_ENV_VAR);
char slurm_jobid_str[sizeof(uint64_t) + 1] = "";
if (NULL != slurm_jobid) {
char comma = ',';
strncat(slurm_jobid_str, &comma, 1);
strcat(slurm_jobid_str, slurm_jobid);
printf("TEST JobID: %s", slurm_jobid_str);
}
char info_to_send[BUFFLEN]; char info_to_send[BUFFLEN];
memset(info_to_send, 0, BUFFLEN); memset(info_to_send, 0, BUFFLEN);
snprintf(info_to_send, BUFFLEN, snprintf(info_to_send, BUFFLEN,
"{\"msg_type\": 128, \"msg_data\": \"%d,%u,%u,%zu%s\"}", "{\"msg_type\": 128, \"msg_data\": \"%d,%u,%u,%zu%s%s\"}",
pid, vpid, jobid, size, vrm_jobid_with_leading_comma); pid, vpid, jobid, size, vrm_jobid_with_leading_comma, slurm_jobid_str);
uint32_t msg_length = strlen(info_to_send) + 1; uint32_t msg_length = strlen(info_to_send) + 1;
// Ensure that little endian is used for communinication (by convention with server) // Ensure that little endian is used for communinication (by convention with server)
...@@ -179,6 +191,7 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v ...@@ -179,6 +191,7 @@ static int get_modified_ranks(uint32_t jobid, uint32_t vpid, size_t size, opal_v
// receive the actual message content // receive the actual message content
recv(socket_fd, rank_to_recv, answer_length, 0); recv(socket_fd, rank_to_recv, answer_length, 0);
printf("Received from server: %s\n", rank_to_recv); printf("Received from server: %s\n", rank_to_recv);
fflush(stdout);
// look for msg_data field in JSON string // look for msg_data field in JSON string
const char msg_data_key[] = "msg_data"; const char msg_data_key[] = "msg_data";
......
...@@ -114,6 +114,8 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf ...@@ -114,6 +114,8 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
/* initialize the port name to avoid problems */ /* initialize the port name to avoid problems */
memset(port_name, 0, MPI_MAX_PORT_NAME); memset(port_name, 0, MPI_MAX_PORT_NAME);
printf("MPI E\n"); fflush(stdout);
/* See if the info key "ompi_non_mpi" was set to true */ /* See if the info key "ompi_non_mpi" was set to true */
if (rank == root) { if (rank == root) {
...@@ -124,22 +126,34 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf ...@@ -124,22 +126,34 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf
if (!non_mpi) { if (!non_mpi) {
/* Open a port. The port_name is passed as an environment /* Open a port. The port_name is passed as an environment
variable to the children. */ variable to the children. */
printf("NON_MPI\n");
fflush(stdout);
if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) {
goto error; goto error;
} }
} else if (1 < ompi_comm_size(comm)) { } else if (1 < ompi_comm_size(comm)) {
printf("OMPI_COMM_SIZE\n");
fflush(stdout);
/* we do not support non_mpi spawns on comms this size */ /* we do not support non_mpi spawns on comms this size */
rc = OMPI_ERR_NOT_SUPPORTED; rc = OMPI_ERR_NOT_SUPPORTED;
goto error; goto error;
} }
printf("SPAWN\n");
fflush(stdout);
if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs, if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs,
&info, port_name))) { &info, port_name))) {
goto error; goto error;
} }
} } printf("MPI F\n");
fflush(stdout);
error: printf("MPI G\n");
fflush(stdout);
error:
if (OMPI_SUCCESS != rc) { if (OMPI_SUCCESS != rc) {
printf("NO SUCCESS\n");
fflush(stdout);
/* There was an error in one of the above stages, /* There was an error in one of the above stages,
* we still need to do the connect_accept stage so that * we still need to do the connect_accept stage so that
* non-root ranks do not deadlock. * non-root ranks do not deadlock.
...@@ -158,11 +172,13 @@ error: ...@@ -158,11 +172,13 @@ error:
} }
if (OPAL_ERR_NOT_SUPPORTED == rc) { if (OPAL_ERR_NOT_SUPPORTED == rc) {
printf("NOT SUPPORTED\n");
opal_show_help("help-mpi-api.txt", opal_show_help("help-mpi-api.txt",
"MPI function not supported", "MPI function not supported",
true, true,
FUNC_NAME, FUNC_NAME,
"Underlying runtime environment does not support spawn functionality"); "Underlying runtime environment does not support spawn functionality");
fflush(stdout);
} }
if(port_string != port_name) { if(port_string != port_name) {
...@@ -181,6 +197,9 @@ error: ...@@ -181,6 +197,9 @@ error:
} }
} }
printf("HANDLE ERROR %d", rc);
fflush(stdout);
*intercomm = newcomp; *intercomm = newcomp;
OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME); OMPI_ERRHANDLER_RETURN (rc, comm, rc, FUNC_NAME);
} }
...@@ -17,13 +17,15 @@ ...@@ -17,13 +17,15 @@
#define BUFFLEN 64 #define BUFFLEN 64
extern char **environ;
/* Spawn Modes (dynamic job spawning): /* Spawn Modes (dynamic job spawning):
* 1: Spawn just one process (in one job) * 1: Spawn just one process (in one job)
* 2: Spawn 2 processes in 2 different jobs * 2: Spawn 2 processes in 2 different jobs
* 3: Spawn 2 prcoesses in one (shared) job * 3: Spawn 2 prcoesses in one (shared) job
* 0 or other: Do not spawn a dynamic process/job * 0 or other: Do not spawn a dynamic process/job
*/ */
#define SPAWN_MODE 0 #define SPAWN_MODE 3
void errorExit(char* msg); void errorExit(char* msg);
...@@ -59,6 +61,20 @@ void errorExit(char* msg); ...@@ -59,6 +61,20 @@ void errorExit(char* msg);
}*/ }*/
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
FILE* ptr = fopen("/home/test_out", "a");
fprintf(ptr,"TEST APP STARTED\n");
char **s = environ;
// PRINT ENV
for (; *s; s++) {
fprintf(ptr, "%s\n", *s);
}
char* parentPort = getenv("OMPI_PARENT_PORT");
if (parentPort) {
fprintf(ptr, "Hey we have a parent port! %s", parentPort);
}
fclose(ptr);
int rank, size, len; int rank, size, len;
pid_t pid; pid_t pid;
...@@ -73,6 +89,7 @@ int main(int argc, char* argv[]) { ...@@ -73,6 +89,7 @@ int main(int argc, char* argv[]) {
//notifyProcessAgent(pid, rank, "Spawned"); //notifyProcessAgent(pid, rank, "Spawned");
printf("Hello, world, I am %d of %d, PID: %d\n", rank, size, pid); printf("Hello, world, I am %d of %d, PID: %d\n", rank, size, pid);
fflush(stdout);
// dynamically spawn child process // dynamically spawn child process
// https://mpi.deino.net/mpi_functions/MPI_Comm_spawn.html // https://mpi.deino.net/mpi_functions/MPI_Comm_spawn.html
...@@ -83,7 +100,7 @@ int main(int argc, char* argv[]) { ...@@ -83,7 +100,7 @@ int main(int argc, char* argv[]) {
MPI_Comm parentcomm, intercomm; MPI_Comm parentcomm, intercomm;
MPI_Comm_get_parent( &parentcomm ); MPI_Comm_get_parent( &parentcomm );
if (parentcomm == MPI_COMM_NULL) { if (parentcomm == MPI_COMM_NULL) {
MPI_Comm_spawn( "hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_Comm_spawn( "/home/ompi/rank-swapper-agent/hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0,
MPI_COMM_WORLD, &intercomm, errcodes ); MPI_COMM_WORLD, &intercomm, errcodes );
printf("I'm the parent.\n"); printf("I'm the parent.\n");
} else { } else {
...@@ -102,7 +119,7 @@ int main(int argc, char* argv[]) { ...@@ -102,7 +119,7 @@ int main(int argc, char* argv[]) {
MPI_Comm_get_parent( &parentcomm ); MPI_Comm_get_parent( &parentcomm );
if (parentcomm == MPI_COMM_NULL) { if (parentcomm == MPI_COMM_NULL) {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
MPI_Comm_spawn( "hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0, MPI_Comm_spawn( "/home/ompi/rank-swapper-agent/hello", MPI_ARGV_NULL, np, MPI_INFO_NULL, 0,
MPI_COMM_WORLD, &intercomm, errcodes ); MPI_COMM_WORLD, &intercomm, errcodes );
if (0 != errcodes[0]) { if (0 != errcodes[0]) {
printf("ERROR_SPAWN: code: %d\n", errcodes[0]); printf("ERROR_SPAWN: code: %d\n", errcodes[0]);
...@@ -118,7 +135,7 @@ int main(int argc, char* argv[]) { ...@@ -118,7 +135,7 @@ int main(int argc, char* argv[]) {
int np[2] = { 1, 1 }; int np[2] = { 1, 1 };
int errcodes[2]; int errcodes[2];
MPI_Comm parentcomm, intercomm; MPI_Comm parentcomm, intercomm;
char *cmds[2] = { "hello", "hello" }; char *cmds[2] = { "/home/ompi/rank-swapper-agent/hello", "/home/ompi/rank-swapper-agent/hello" };
MPI_Info infos[2] = { MPI_INFO_NULL, MPI_INFO_NULL }; MPI_Info infos[2] = { MPI_INFO_NULL, MPI_INFO_NULL };
MPI_Comm_get_parent( &parentcomm ); MPI_Comm_get_parent( &parentcomm );
if (parentcomm == MPI_COMM_NULL) { if (parentcomm == MPI_COMM_NULL) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment