/******************************************************************************
One-sided Communication example:
In this example all processes expose a memory region large enough to
contain one integer for each rank in MPI_COMM_WORLD. The MPI-2 standard
defines three synchronization mechanisms:
- Lock/Unlock: One rank requests either a shared or an exclusive
lock on the window exposed by some target rank.
- Fence: The first call to MPI_Win_fence will open an epoch
where all ranks can access all other ranks. The epoch
is closed with another call to MPI_Win_fence.
- Active Target: In this synchronization mode, the targets of remote
memory accesses explicitly grant permission to the
origins through the call MPI_Win_post. An epoch is
closed thought a call to MPI_Win_wait. Likewise, the
origins explicitly request permission through a call
to MPI_Win_start and close the epoch through
MPI_Win_complete. Note that every rank granted access
*must* call MPI_Win_start and eventually
MPI_Win_complete before an epoch is closed.
Recall that "origin" refers to the process that performs the remote memory
access call, while the "target" refers to the process in which memory is
accessed.
In this example we show to use each of the synchronization mechanisms.
More specifically:
--------------------------------
- Every process exposes a window large enough to store one integer per
process in MPI_COMM_WORLD.
- Lock/Unlock: Every rank x accesses rank x+1's memory, and fills the
exposed region with integers of value x.
- Fence: Every process writes one integer (value = rank) to all
other processes.
- ActiveTarget: Every rank x accesses rank x-1's memory, and fills the
exposed region with integers of value x.
- Get: Every process read the exposed memory region of every
process and displays the result.
- The window is freed and MPI_Finalize is called.
--------------------------------
Copyright 2003 (c) Critical Software SA
. http://www.criticalsoftware.com
. http://www.criticalsoftware.com/hpc
. csWMPI II@criticalsoftware.com
*****************************************************************************/
#include <stdio.h>
#include <malloc.h>
#include <mpi.h>
/*
We use a global variable for the MPI_Win used in all the synchronization
examples. Note: If you mix different types of synchronization:
YOU WILL EXPLICITLY HAVE TO SYNCHRONIZE ALL
PROCESSES IN THE GROUP OF THE WINDOW
- either by MPI_Barrier (as done in this example) or my some other method.
*/
MPI_Win g_mwWin;
/******************************************************************************
Displays a list of integers
*****************************************************************************/
void DisplayInts(int* pn_ints, int n_count) {
int nCounter;
for (nCounter = 0; nCounter < n_count; nCounter++) {
printf("(%d)", pn_ints[nCounter]);
}
printf("\n");
}
/******************************************************************************
In this function we use the MPI_Win_lock and MPI_Win_unlock to open and
close epochs.
*****************************************************************************/
void LockUnlock(int n_my_rank, int n_comm_size) {
int nCounter;
int nTargetRank;
int nTargetDisp;
/* Compute the target rank. If my rank is x then my target will be x+1 */
/* (modulo the communicator size). */
nTargetRank = (1 + n_my_rank) % n_comm_size;
/* Lock the window on the target. Use an exclusive lock, since we are */
/* going to do Puts */
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, nTargetRank, 0, g_mwWin);
/* Fill the memory of the target with integers with a value equal to my */
/* rank. We put only one integer at the time (a much more efficient */
/* approach would have been to put n_comm_size integers using a single */
/* Put operation, however the purpose here is just to show how */
/* one-sided communication works, and not how to write efficient */
/* applications). */
for (nCounter = 0; nCounter < n_comm_size; nCounter++) {
/* Compute the target displacement. Since the Win was created using */
/* a displacement of 1 [see main()] we have to compute the */
/* displacements in bytes. Alternatively, we could have created the */
/* window using a displacement unit of sizeof(int), and simply used */
/* nCounter as the displacement. */
nTargetDisp = nCounter * sizeof(int);
/* Perform the Put. Write one integer: */
MPI_Put (&n_my_rank,
1,
MPI_INT,
nTargetRank,
nTargetDisp,
1,
MPI_INT,
g_mwWin);
}
/* Unlock the window, since we are done accessing it for now: */
MPI_Win_unlock(nTargetRank, g_mwWin);
/* Since we are going to use other types of synchronization methods */
/* later we synchronize all processes: */
MPI_Barrier(MPI_COMM_WORLD);
}
/******************************************************************************
In this function we use MPI_Fence to open and close epochs. In this function
we write one integer to all other processes.
*****************************************************************************/
void Fence(int n_my_rank, int n_comm_size) {
int nCounter;
int nTargetDisp;
nTargetDisp = n_my_rank * sizeof(int);
/* Open an epoch: */
MPI_Win_fence (0, g_mwWin);
for (nCounter = 0; nCounter < n_comm_size; nCounter++) {
/* Put an integer in the memory exposed by rank nCounter */
MPI_Put (&n_my_rank,
1,
MPI_INT,
nCounter,
nTargetDisp,
1,
MPI_INT,
g_mwWin);
}
/* Close the epoch: */
MPI_Win_fence(0, g_mwWin);
/* Since we are going to use other types of synchronization methods */
/* later we synchronize all processes: */
MPI_Barrier(MPI_COMM_WORLD);
}
/******************************************************************************
In this function we use MPI_Win_post, MPI_Win_wait, MPI_Win_start, and
MPI_Win_complete to open and close epochs.
If we have rank x, then we would like to access rank x-1 and to let rank
x+1 access our exposed memory.
*****************************************************************************/
void ActiveTarget(int n_my_rank, int n_comm_size) {
int nCounter;
int nPostRank;
int nStartRank;
int nTargetDisp;
MPI_Group mgCommWorld;
MPI_Group mgPostGroup;
MPI_Group mgStartGroup;
/* Compute the rank of the process, which will access our memory, and */
/* the rank exposing the memory that we are going to access: */
nPostRank = (n_my_rank + 1) % n_comm_size;
nStartRank = (n_comm_size + (n_my_rank - 1)) % n_comm_size;
/* Get the group of MPI_COMM_WORLD and create groups for the Post and */
/* Start ranks: */
MPI_Comm_group(MPI_COMM_WORLD, &mgCommWorld);
MPI_Group_incl(mgCommWorld, 1, &nPostRank, &mgPostGroup);
MPI_Group_incl(mgCommWorld, 1, &nStartRank, &mgStartGroup);
/* Open the epochs: */
MPI_Win_post(mgPostGroup, 0, g_mwWin);
MPI_Win_start(mgStartGroup, 0, g_mwWin);
for (nCounter = 0; nCounter < n_comm_size; nCounter++) {
nTargetDisp = nCounter * sizeof(int);
MPI_Put (&n_my_rank,
1,
MPI_INT,
nStartRank,
nTargetDisp,
1,
MPI_INT,
g_mwWin);
}
/* Close the epochs: */
MPI_Win_complete(g_mwWin);
MPI_Win_wait(g_mwWin);
/* Free the groups: */
MPI_Group_free(&mgCommWorld);
MPI_Group_free(&mgPostGroup);
MPI_Group_free(&mgStartGroup);
/* Since we are going to use other types of synchronization methods */
/* later we synchronize all processes: */
MPI_Barrier(MPI_COMM_WORLD);
}
/******************************************************************************
This function reads (using Get) the content of all exposed memory regions
and displays it:
*****************************************************************************/
void Get(int n_comm_size) {
int* pnTempBuffer;
int nCounter;
/* Allocate a buffer large enough to hold the content of one process' */
/* exposed region */
pnTempBuffer = (int *) malloc(n_comm_size * sizeof(int));
/* Read the content of exposed regions one by one. We use shared locks */
/* since we only do reads. */
for (nCounter = 0; nCounter < n_comm_size; nCounter++) {
/* Open an epoch: */
MPI_Win_lock(MPI_LOCK_SHARED, nCounter, 0, g_mwWin);
MPI_Get(pnTempBuffer,
n_comm_size,
MPI_INT,
nCounter,
0,
n_comm_size,
MPI_INT,
g_mwWin);
/* Close the epoch. We have to do before accessing pnTempBuffer to */
/* make sure that the Get operation has completed. */
MPI_Win_unlock(nCounter, g_mwWin);
/* Display the result: */
printf("Got the following from rank %d: ", nCounter);
DisplayInts(pnTempBuffer, n_comm_size);
}
free(pnTempBuffer);
}
int main(int argc, char ** argv) {
int nCommRank;
int nCommSize;
int nWindowSize;
int* pchSharedMemory = NULL;
/* Initialize csWMPI II II: */
MPI_Init(&argc, &argv);
/* Determine what the world looks like and our own position in it: */
MPI_Comm_size(MPI_COMM_WORLD, &nCommSize);
MPI_Comm_rank(MPI_COMM_WORLD, &nCommRank);
printf("I am rank %d of %d in MPI_COMM_WORLD\n", nCommRank, nCommSize);
fflush(stdout);
/* Compute the size of the memory region to expose and allocate memory: */
nWindowSize = nCommSize * sizeof(int);
pchSharedMemory = (int*) malloc(nWindowSize);
/* Create the window (same size for all processes. Note that we use a */
/* displacement of 1. This means that displacements in Puts and Gets */
/* have to be calculated as byte offsets. */
MPI_Win_create(pchSharedMemory,
nWindowSize,
1,
MPI_INFO_NULL,
MPI_COMM_WORLD,
&g_mwWin);
/* Use the lock/unlock synchronization: */
LockUnlock(nCommRank, nCommSize);
printf("After LockUnlock - My memory region contains: ");
DisplayInts(pchSharedMemory, nCommSize);
fflush(stdout);
/* Use the fence synchronization: */
Fence(nCommRank, nCommSize);
printf("After Fence - My memory region contains: ");
DisplayInts(pchSharedMemory, nCommSize);
fflush(stdout);
/* Use the active target (post,wait,start,complete) synchronization: */
ActiveTarget(nCommRank, nCommSize);
printf("After PostWaitStartComplete - My memory region contains: ");
DisplayInts(pchSharedMemory, nCommSize);
fflush(stdout);
/* Use get to read the content of the exposed memory regions on all */
/* processes: */
printf("\nGetting content of all ranks exposed memory regions: \n");
Get(nCommSize);
fflush(stdout);
/* Free the window and the allocated memory: */
MPI_Win_free(&g_mwWin);
free(pchSharedMemory);
MPI_Finalize ();
/* Pause rank 0 so that the output can be verified: */
if (nCommRank == 0) {
printf("\nPress ENTER to exit...\n");
fflush(stdout);
getchar();
}
return 0;
}
|