median of median implementation

dato datuashvili picture dato datuashvili · Mar 13, 2012 · Viewed 12.7k times · Source

Here is pseudo code for implementation of median by dividing array into 5 groups

select(int A[],int first, int last, int i) {
    n = last - first + 1; /* n is the number elements to select from */
    if (i > n) {return ERROR;} /* there is no ith smallest element */
    if( n < = 100 ) {
        /********************* For Small n *********************/
        Run selection on A[first..last] taking at most n(n-1)/2 < 50n comparisons;
        swap A[first+i-1] with A[first] /* put ith smallest in A[first] */
    }
    else /* n > 100 */ {
        /********** main recursion *************************/
        numGroups = n / 5; /* integer division, round down */
        for group = 0 to numGroups-1 do {
            shift = group*5;
            /* A[first+shift] is the start of the group, A[first+shift+4] is end of group */
            find median of A[first+shift .. first+shift+4] and swap it into A[first + group];
        } /* for group */;
        lastMedian = first+numGroups-1;
        /* now the medians of the numGroups groups are all A[first .. lastMedian] */
        /****** the first recursive call to find median of medians ******/
        select(A, first, lastMedian, numGroups/2);
        /* now median of medians is in slot A[first] */
        /*********** partition array *********************/
        k = partition(A,first, last); /* See partition on page 146 of text */
        /* now k is the index where the median of median winds up, the smaller elements */
        /* will be in A[first..k-1] and larger elements will be in A[k+1..last] */
        /************ where is the ith smallest element? ********/
        if (k == first + i -1) {
            /* ith smallest is the median of medians in A[k] */
            swap A[k] and A[first] and return
        } else if (k > = first + i -1) {
            /* second recursion to find ith smallest among the "small" keys in A[first..k-1] */
            select(A, first, k-1, i);
        } else /* k < first + i -1 */ {
            /* second recursion to find the proper element among the "large" keys */
            numSmaller = k-first+1; /* the number of "smaller" keys not recursed on */
            newi = i - numSmaller;
            /* the ith smallest of A[first..last] is the newi smallest of A[k+1..last] */
            select(A, k+1, last, newi);
            /* ith smallest now in A[k+1], put it in A[first] */
            swap A[k+1] and A[first];
        } /* if k - second else */
    } /* if n - else part */
} /*select */

I have two questions:

  1. first one is related to partition code, here we are given only array and its bounds, no pivot element is indicated, so how this partition code should look? We should choose pivot index and pivot element as:

    int pivotindex=(end-begin)/2
    int pivot values=a[pivotindex];
    

    or it should be random choice?

  2. how to output selected median?

Generally language does not matter, but it would be great if example would be shown in C++.

Answer

totjammykd picture totjammykd · Feb 28, 2013

Explanation of the median - of - medians algorithm to find the k-th largest integer out of n can be found here: http://cs.indstate.edu/~spitla/presentation.pdf

Implementation in c++ is below:

#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

int findMedian(vector<int> vec){
//    Find median of a vector
    int median;
    size_t size = vec.size();
    median = vec[(size/2)];
    return median;
}

int findMedianOfMedians(vector<vector<int> > values){
    vector<int> medians;

    for (int i = 0; i < values.size(); i++) {
        int m = findMedian(values[i]);
        medians.push_back(m);
    }

    return findMedian(medians);
}

void selectionByMedianOfMedians(const vector<int> values, int k){
//    Divide the list into n/5 lists of 5 elements each
    vector<vector<int> > vec2D;

    int count = 0;
    while (count != values.size()) {
        int countRow = 0;
        vector<int> row;

        while ((countRow < 5) && (count < values.size())) {
            row.push_back(values[count]);
            count++;
            countRow++;
        }
        vec2D.push_back(row);
    }

    cout<<endl<<endl<<"Printing 2D vector : "<<endl;
    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            cout<<vec2D[i][j]<<" ";
        }
        cout<<endl;
    }
    cout<<endl;

//    Calculating a new pivot for making splits
    int m = findMedianOfMedians(vec2D);
    cout<<"Median of medians is : "<<m<<endl;

//    Partition the list into unique elements larger than 'm' (call this sublist L1) and
//    those smaller them 'm' (call this sublist L2)
    vector<int> L1, L2;

    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            if (vec2D[i][j] > m) {
                L1.push_back(vec2D[i][j]);
            }else if (vec2D[i][j] < m){
                L2.push_back(vec2D[i][j]);
            }
        }
    }

//    Checking the splits as per the new pivot 'm'
    cout<<endl<<"Printing L1 : "<<endl;
    for (int i = 0; i < L1.size(); i++) {
        cout<<L1[i]<<" ";
    }

    cout<<endl<<endl<<"Printing L2 : "<<endl;
    for (int i = 0; i < L2.size(); i++) {
        cout<<L2[i]<<" ";
    }

//    Recursive calls
    if ((k - 1) == L1.size()) {
        cout<<endl<<endl<<"Answer :"<<m;
    }else if (k <= L1.size()) {
        return selectionByMedianOfMedians(L1, k);
    }else if (k > (L1.size() + 1)){
        return selectionByMedianOfMedians(L2, k-((int)L1.size())-1);
    }

}

int main()
{
    int values[] = {2, 3, 5, 4, 1, 12, 11, 13, 16, 7, 8, 6, 10, 9, 17, 15, 19, 20, 18, 23, 21, 22, 25, 24, 14};

    vector<int> vec(values, values + 25);

    cout<<"The given array is : "<<endl;
    for (int i = 0; i < vec.size(); i++) {
        cout<<vec[i]<<" ";
    }

    selectionByMedianOfMedians(vec, 8);

    return 0;
}