Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
sendrecv_replace.c 6.56 KiB
/*
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
 * Copyright (c) 2004-2020 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
 * Copyright (c) 2015-2021 Research Organization for Information Science
 *                         and Technology (RIST).  All rights reserved.
 * Copyright (c) 2017      IBM Corporation.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */
#include "ompi_config.h"

#include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/params.h"
#include "ompi/communicator/communicator.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/proc/proc.h"
#include "ompi/memchecker.h"
#include "ompi/runtime/ompi_spc.h"

#if OMPI_BUILD_MPI_PROFILING
#if OPAL_HAVE_WEAK_SYMBOLS
#pragma weak MPI_Sendrecv_replace = PMPI_Sendrecv_replace
#endif
#define MPI_Sendrecv_replace PMPI_Sendrecv_replace
#endif

static const char FUNC_NAME[] = "MPI_Sendrecv_replace";


int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
                         int dest, int sendtag, int source, int recvtag,
                         MPI_Comm comm, MPI_Status *status)

{
    ompi_request_t* req;
    int rc = MPI_SUCCESS;
#if OPAL_ENABLE_FT_MPI
    int rcs = MPI_SUCCESS;
#endif

    SPC_RECORD(OMPI_SPC_SENDRECV_REPLACE, 1);

    MEMCHECKER(
               memchecker_datatype(datatype);
               memchecker_call(&opal_memchecker_base_isdefined, buf, count, datatype);
               memchecker_comm(comm);
               );

    if ( MPI_PARAM_CHECK ) {
        rc = MPI_SUCCESS;
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        OMPI_CHECK_DATATYPE_FOR_RECV(rc, datatype, count);

        if (ompi_comm_invalid(comm)) {
            return OMPI_ERRHANDLER_NOHANDLE_INVOKE(MPI_ERR_COMM, FUNC_NAME);
        } else if (dest != MPI_PROC_NULL && ompi_comm_peer_invalid(comm, dest)) {
            rc = MPI_ERR_RANK;
        } else if (sendtag < 0 || sendtag > mca_pml.pml_max_tag) {
            rc = MPI_ERR_TAG;
        } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && ompi_comm_peer_invalid(comm, source)) {
            rc = MPI_ERR_RANK;
        } else if (((recvtag < 0) && (recvtag !=  MPI_ANY_TAG)) || (recvtag > mca_pml.pml_max_tag)) {
            rc = MPI_ERR_TAG;
        }
        OMPI_ERRHANDLER_CHECK(rc, comm, rc, FUNC_NAME);
    }

#if OPAL_ENABLE_FT_MPI
    /*
     * The final call to Sendrecv will check for process failures inside
     * So no need to check here.
     */
#endif /* OPAL_ENABLE_FT_MPI */

    /* simple case */
    if ( source == MPI_PROC_NULL || dest == MPI_PROC_NULL || count == 0 ) {
        rc = PMPI_Sendrecv(buf, count, datatype, dest, sendtag, buf, count, datatype, source, recvtag, comm, status);

        return rc;
    }

    /**
     * If we look for an optimal solution, then we should receive the data into a temporary buffer
     * and once the send completes we would unpack back into the original buffer. However, if the
     * sender is unknown, this approach can only be implementing by receiving with the recv datatype
     * (potentially non-contiguous) and thus the allocated memory will be larger than the size of the
     * datatype. A simpler, but potentially less efficient approach is to work on the data we have
     * control of, aka the sent data, and pack it into a contiguous buffer before posting the receive.
     * Once the send completes, we free it.
     */
    opal_convertor_t convertor;
    unsigned char packed_data[2048];
    struct iovec iov = { .iov_base = packed_data, .iov_len = sizeof(packed_data) };
    size_t packed_size, max_data;
    uint32_t iov_count;
    ompi_proc_t* proc = ompi_comm_peer_lookup(comm, dest);
    if(proc == NULL) {
        rc = MPI_ERR_RANK;
        OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
    }

    /* initialize convertor to unpack recv buffer */
    OBJ_CONSTRUCT(&convertor, opal_convertor_t);
    opal_convertor_copy_and_prepare_for_send( proc->super.proc_convertor, &(datatype->super),
                                              count, buf, 0, &convertor );

    /* setup a temporary buffer to send */
    opal_convertor_get_packed_size( &convertor, &packed_size );
    if( packed_size > sizeof(packed_data) ) {
        rc = PMPI_Alloc_mem(packed_size, MPI_INFO_NULL, &iov.iov_base);
        if(OMPI_SUCCESS != rc) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            goto cleanup_and_return;
        }
        iov.iov_len = packed_size;
    }
    max_data = packed_size;
    iov_count = 1;
    rc = opal_convertor_pack(&convertor, &iov, &iov_count, &max_data);

    /* receive into the buffer */
    rc = MCA_PML_CALL(irecv(buf, count, datatype,
                            source, recvtag, comm, &req));
    if(OMPI_SUCCESS != rc) {
        goto cleanup_and_return;
    }

    /* send from the temporary buffer */
    rc = MCA_PML_CALL(send(iov.iov_base, packed_size, MPI_PACKED, dest,
                           sendtag, MCA_PML_BASE_SEND_STANDARD, comm));
#if OPAL_ENABLE_FT_MPI
    /* If ULFM is enabled we need to wait for the posted receive to
     * complete, hence we cannot return here */
    rcs = rc;
#else
    if(OMPI_SUCCESS != rc) {
        goto cleanup_and_return;
    }
#endif  /* OPAL_ENABLE_FT_MPI */

    rc = ompi_request_wait(&req, status);
#if OPAL_ENABLE_FT_MPI
    /* Sendrecv_replace never returns ERR_PROC_FAILED_PENDING because it is
     * blocking. Lets complete now that irecv and promote the error
     * to ERR_PROC_FAILED */
    if( OPAL_UNLIKELY(MPI_ERR_PROC_FAILED_PENDING == rc) ) {
        ompi_request_cancel(req);
        ompi_request_wait(&req, MPI_STATUS_IGNORE);
        rc = MPI_ERR_PROC_FAILED;
    }
#endif

#if OPAL_ENABLE_FT_MPI
    if( OPAL_UNLIKELY(MPI_SUCCESS != rcs && MPI_SUCCESS == rc) ) {
        rc = rcs;
    }
#endif

 cleanup_and_return:

    /* release resources */
    if(packed_size > sizeof(packed_data)) {
        PMPI_Free_mem(iov.iov_base);
    }
    OBJ_DESTRUCT(&convertor);

    OMPI_ERRHANDLER_RETURN(rc, comm, rc, FUNC_NAME);
}