#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>

int main(int argc, char* argv[]) {
  MPI_Init(&argc, &argv);

  int size, rank;
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  MPI_Status status;

  long int bytes_to_gbytes = 1 << 30;

  for (int i = 0; i <= 27; i++) {
    long int N = 1 << i;

    double *A = (double*)malloc(N*sizeof(double));

    for (int i = 0; i < N; i++) A[i] = 0.0;

    for (int n = 1; n <= 5; ++n) {
      if (rank == 0) {
        MPI_Send(A, N, MPI_DOUBLE, 1, 10, MPI_COMM_WORLD);
        MPI_Recv(A, N, MPI_DOUBLE, 1, 20, MPI_COMM_WORLD, &status);
      } else if (rank == 1) {
        MPI_Recv(A, N, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD, &status);
        MPI_Send(A, N, MPI_DOUBLE, 0, 20, MPI_COMM_WORLD);
      }
    }

    double elapsed_time = -1.0 * MPI_Wtime();

    for (int n = 1; n <= 50; ++n) {
      if (rank == 0) {
        MPI_Send(A, N, MPI_DOUBLE, 1, 10, MPI_COMM_WORLD);
        MPI_Recv(A, N, MPI_DOUBLE, 1, 20, MPI_COMM_WORLD, &status);
      } else if (rank == 1) {
        MPI_Recv(A, N, MPI_DOUBLE, 0, 10, MPI_COMM_WORLD, &status);
        MPI_Send(A, N, MPI_DOUBLE, 0, 20, MPI_COMM_WORLD);
      }
    }

    elapsed_time += MPI_Wtime();

    long int num_bytes = 8 * N;
    double num_gbytes = (double)num_bytes / (double)bytes_to_gbytes;
    double avg_time_per_transfer = elapsed_time / (2.0 * 50);

    if(rank == 0)
      printf("Transfer size (B): %10li, Transfer Time (s): %15.9f, "
             "Bandwidth (GB/s): %15.9f\n", 
              num_bytes, avg_time_per_transfer, 
              num_gbytes/avg_time_per_transfer );
    free(A);
  }

  MPI_Finalize();

  return 0;
}