Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
Master Thesis Custom SLURM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
becker29
Master Thesis Custom SLURM
Commits
bbe6b3e4
Commit
bbe6b3e4
authored
1 year ago
by
René Pascal Becker
Browse files
Options
Downloads
Patches
Plain Diff
Fix minor memory issues
parent
d57f78d7
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/plugins/mpi/pmix/pmixp_client_v2.c
+1
-0
1 addition, 0 deletions
src/plugins/mpi/pmix/pmixp_client_v2.c
src/plugins/mpi/pmix/pmixp_spawn.c
+25
-14
25 additions, 14 deletions
src/plugins/mpi/pmix/pmixp_spawn.c
with
26 additions
and
14 deletions
src/plugins/mpi/pmix/pmixp_client_v2.c
+
1
−
0
View file @
bbe6b3e4
...
...
@@ -112,6 +112,7 @@ static pmix_status_t _fencenb_fn(const pmix_proc_t procs_v2[], size_t nprocs,
const
pmix_info_t
info
[],
size_t
ninfo
,
char
*
data
,
size_t
ndata
,
pmix_modex_cbfunc_t
cbfunc
,
void
*
cbdata
)
{
THESIS_LOG
int
ret
=
PMIX_SUCCESS
;
size_t
i
;
pmix_proc_t
*
procs
=
xmalloc
(
sizeof
(
*
procs
)
*
nprocs
);
...
...
This diff is collapsed.
Click to expand it.
src/plugins/mpi/pmix/pmixp_spawn.c
+
25
−
14
View file @
bbe6b3e4
...
...
@@ -133,7 +133,8 @@ int _connect_to_dpm_agent() {
char
*
_create_message
(
int
message_id
,
char
*
data
)
{
const
int
padding_size
=
64
;
const
int
msg_size
=
strlen
(
data
)
+
padding_size
;
char
*
msg
=
(
char
*
)
xmalloc
(
msg_size
);
char
*
msg
=
(
char
*
)
malloc
(
msg_size
);
memset
(
msg
,
0
,
msg_size
);
snprintf
(
msg
,
msg_size
,
"{
\"
msg_type
\"
: %d,
\"
msg_data
\"
:
\"
%s
\"
}"
,
message_id
,
data
);
...
...
@@ -141,7 +142,7 @@ char *_create_message(int message_id, char *data) {
return
msg
;
}
void
_destroy_message
(
char
*
msg
)
{
x
free
(
msg
);
}
void
_destroy_message
(
char
*
msg
)
{
free
(
msg
);
}
int
_send_message
(
int
socket_fd
,
int
message_id
,
char
*
data
)
{
int
result
=
0
;
...
...
@@ -165,12 +166,21 @@ sendMessageFail:
char
*
_receive_message
(
int
socket_fd
)
{
uint32_t
msg_length
=
0
;
if
(
recv
(
socket_fd
,
&
msg_length
,
sizeof
(
msg_length
),
0
)
<
0
)
int
ec
=
recv
(
socket_fd
,
&
msg_length
,
sizeof
(
msg_length
),
0
);
if
(
ec
<
0
)
{
FILE
*
ptr
=
fopen
(
"/home/pmix"
,
"a"
);
fprintf
(
ptr
,
"Failed Read EC: %d
\n
"
,
ec
);
fclose
(
ptr
);
return
NULL
;
}
msg_length
=
le32toh
(
msg_length
);
char
*
data
=
(
char
*
)
xmalloc
(
msg_length
);
if
(
recv
(
socket_fd
,
data
,
msg_length
,
0
)
<
0
)
{
char
*
data
=
(
char
*
)
malloc
(
msg_length
);
ec
=
recv
(
socket_fd
,
data
,
msg_length
,
0
);
if
(
ec
<
0
)
{
FILE
*
ptr
=
fopen
(
"/home/pmix"
,
"a"
);
fprintf
(
ptr
,
"Failed Read EC: %d
\n
"
,
ec
);
fclose
(
ptr
);
free
(
data
);
return
NULL
;
}
...
...
@@ -188,8 +198,9 @@ char *_cpy_message_data(char *json) {
len
++
;
len
++
;
char
*
result
=
(
char
*
)
x
malloc
(
len
);
char
*
result
=
(
char
*
)
malloc
(
len
);
strlcpy
(
result
,
start
,
len
);
result
[
len
-
1
]
=
'\0'
;
return
result
;
}
...
...
@@ -682,7 +693,7 @@ int collect_connected(const pmix_proc_t procs[], size_t proc_count,
const
size_t
max_proc_info_len
=
128
;
char
data
[
max_proc_info_len
];
memset
((
void
*
)
data
,
0
,
max_proc_info_len
);
snprintf
(
data
,
max_proc_info_len
,
"%s,%u"
,
(
const
char
*
)
procs
[
0
].
nspace
,
snprintf
(
data
,
sizeof
data
,
"%s,%u"
,
(
const
char
*
)
procs
[
0
].
nspace
,
procs
[
0
].
rank
);
msg_data
=
(
char
*
)
malloc
(
strlen
(
data
)
+
1
);
strcpy
(
msg_data
,
data
);
...
...
@@ -690,7 +701,7 @@ int collect_connected(const pmix_proc_t procs[], size_t proc_count,
// Append other procs
for
(
size_t
i
=
1
;
i
<
proc_count
;
i
++
)
{
memset
((
void
*
)
data
,
0
,
max_proc_info_len
);
snprintf
(
data
,
max_proc_info_len
,
",%s,%u"
,
(
const
char
*
)
procs
[
i
].
nspace
,
snprintf
(
data
,
sizeof
data
,
",%s,%u"
,
(
const
char
*
)
procs
[
i
].
nspace
,
procs
[
i
].
rank
);
char
*
old_data
=
msg_data
;
msg_data
=
(
char
*
)
malloc
(
strlen
(
data
)
+
strlen
(
old_data
)
+
1
);
...
...
@@ -829,11 +840,12 @@ char **_create_srun_argv(const pmix_app_t *app, int *_out_app_argc) {
return
argv
;
}
void
_populate_srun_argv
(
const
pmix_app_t
*
app
,
int
app_argc
,
char
**
argv
)
{
void
_populate_srun_argv
(
const
pmix_app_t
*
app
,
int
app_argc
,
char
**
argv
,
size_t
subtask_num
)
{
int
index
=
0
;
argv
[
index
++
]
=
"srun"
;
argv
[
index
++
]
=
"--mpi=pmix"
;
argv
[
index
++
]
=
"--output=/home/subtask.out"
;
xstrfmtcat
(
argv
[
index
++
]
,
"--output=/home/subtask
%zu
.out"
,
subtask_num
)
;
xstrfmtcat
(
argv
[
index
++
],
"--ntasks=%d"
,
app
->
maxprocs
);
for
(
size_t
i
=
0
;
i
<
app
->
ninfo
;
i
++
)
{
...
...
@@ -904,7 +916,7 @@ int _validate_launch(const pmix_proc_t *proc, const pmix_app_t apps[],
_destroy_message
(
msg
);
// Ignore namespace, rank
strtok
(
msg
,
","
);
strtok
(
msg
_data
,
","
);
strtok
(
NULL
,
","
);
// Check if we are allowed to proceed
...
...
@@ -920,8 +932,7 @@ int _validate_launch(const pmix_proc_t *proc, const pmix_app_t apps[],
char
*
count
;
uint32_t
totalCount
=
0
;
size_t
*
ids
=
(
size_t
*
)
malloc
(
0
);
while
((
count
=
strtok
(
NULL
,
","
))
!=
NULL
&&
count
[
0
]
!=
'\"'
)
{
// last check for start of "msg_type"
while
((
count
=
strtok
(
NULL
,
","
))
!=
NULL
)
{
uint32_t
numTasks
;
sscanf
(
count
,
"%u"
,
&
numTasks
);
...
...
@@ -941,7 +952,7 @@ void _launch_app(const pmix_proc_t *parent, const pmix_app_t *app,
size_t
dynamicId
)
{
int
app_argc
;
char
**
argv
=
_create_srun_argv
(
app
,
&
app_argc
);
_populate_srun_argv
(
app
,
app_argc
,
argv
);
_populate_srun_argv
(
app
,
app_argc
,
argv
,
dynamicId
);
size_t
index
=
0
;
FILE
*
ptr
=
fopen
(
"/home/pmix"
,
"a"
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment