QuickSelect

From My Wiki
Jump to: navigation, search

Story Time

Imagine you walk into a gymnasium where there are 200 children.
You want to know who is the 98st shortest child.
Sure you could sort them and then pick the 98th but going through all of them would take forever.

Instead use quickselect.

Pick one of the children at random (or just pick the first one you see). Let call this child Peter Pivot.
"Everyone shorter than (or equal in height to) Peter stand to the left of him"
"Everyone taller than Peter stand to the right of him"

So while you are shuffling the students to either side of Peter you count 120 children in the shorter group and 79 in the taller group.
The 98th shortest child must be in the shorter group, so you tell Peter and the 79 taller children to sit in the bleachers.

From these remaining students you pick another one at random (or again, the first one you just see). Lets call this person Paula Pivot.
"Those who are still standing and are shorter than (or equal in height to) Paula stand to the left of her"
"Those who are taller stand to the right of her"

So while you are shuffling the students to either side of Paula you count and find 59 children in the shorter group, and 60 children in the taller group. You know the 98th shortest child must be in the taller group, so you tell Paula and the 59 shorter children to sit in the bleachers.

From these remaining students you pick another one at random (or again, the first one you just see). Lets call this person Prudence Pivot.
"Those who are still standing and are shorter than (or equal in height to) Prudence stand to the left"
"Those who are taller stand to the right"

So while you are shuffling the students to either side of Prudence you count and find 37 children in the shorter group, and 22 children in the taller group. You know that Paula and 59 other shorter children are sitting on the bleachers. Along with the 37 shorter children still standing, you know that a total of 97 children are shorter than Prudence. Therefore, Prudence is the 98th shortest child!

start
1................................................................200
1.............................120( )1.............................79
1............59( )1............60
                 1...37( )1...22
                        -98th shortest

Java Code

public class Quickselect {
    /**
     * Runs the program with an example list.
     *
     * @param args      The command-line arguments.
     */
    public static void main(String[] args) {
        int[] list = { 3, 5, 9, 10, 7, 40, 23, 45, 21, 2};
        int k = 6;
        Integer kthSmallest = quickselect(list, k);

        if (kthSmallest != null) {
            System.out.println("The kth smallest element in the list where k=" + k + " is " + kthSmallest + ".");
        } else {
            System.out.println("There is no kth smallest element in the list where k=" + k + ".");
        }
    }

    /**
     * Determines the kth order statistic for the given list.
     *
     * @param list      The list.
     * @param k         The k value to use.
     * @return          The kth order statistic for the list.
     */
    public static Integer quickselect(int[] list, int k) {
        return quickselect(list, 0, list.length - 1, k);
    }

    /**
     * Recursively determines the kth order statistic for the given list.
     *
     * @param list          The list.
     * @param leftIndex     The left index of the current sublist.
     * @param rightIndex    The right index of the current sublist.
     * @param k             The k value to use.
     * @return              The kth order statistic for the list.
     */
    public static Integer quickselect(int[] list, int leftIndex, int rightIndex, int k) {
    	// Base case
        if (leftIndex == rightIndex) {
            return list[leftIndex];
        }
        
        // Partition the sublist into two halves
        int pivotIndex = randomPartition(list, leftIndex, rightIndex);
        int sizeLeft = pivotIndex - leftIndex + 1;

        // Perform comparisons and recurse in binary search / quicksort fashion
        if (sizeLeft == k) {
            return list[pivotIndex];
        } else if (sizeLeft > k) {
            return quickselect(list, leftIndex, pivotIndex - 1, k);
        } else {
            return quickselect(list, pivotIndex + 1, rightIndex, k - sizeLeft);
        }
    }

    /**
     * Randomly partitions a set about a pivot such that the values to the left
     * of the pivot are less than or equal to the pivot and the values to the
     * right of the pivot are greater than the pivot.
     *
     * @param list          The list.
     * @param leftIndex     The left index of the current sublist.
     * @param rightIndex    The right index of the current sublist.
     * @return              The index of the pivot.
     */
    public static int randomPartition(int[] list, int leftIndex, int rightIndex) {
        //random
    	int pivotIndex = (int) ((Math.random()*(rightIndex-leftIndex +1)+leftIndex));
        int pivotValue = list[pivotIndex];
        int storeIndex = leftIndex;
        
        swap(list, pivotIndex, rightIndex);

        for (int i = leftIndex; i < rightIndex; i++) {
            if (list[i] <= pivotValue) {
                swap(list, storeIndex, i);
                storeIndex++;
            }
            for(int p:list){
            	
            	if(p==pivotValue){
            		System.out.print("["+p+"] ");
            	}else{
            		System.out.print(p+" ");
            	}
            }
            System.out.println();
        }

        swap(list, rightIndex, storeIndex);


        for(int p:list){
        	
        	if(p==pivotValue){
        		System.out.print("["+p+"] ");
        	}else{
        		System.out.print(p+" ");
        	}
        }
        System.out.println();
        
        return storeIndex;
    }
    
    /**
     * Swaps two elements in a list.
     *
     * @param list      The list.
     * @param index1    The index of the first element to swap.
     * @param index2    The index of the second element to swap.
     */
    public static void swap(int[] list, int index1, int index2) {
        int temp = list[index1];
        list[index1] = list[index2];
        list[index2] = temp;
    }
}

Understanding Output

Example output

leftIndex = 0
storeIndex = 0
rightIndex = 9

pivot indicated in [ ]
23 picked as inital pivot and moved to right most
  0   1   2   3   4   5   6   7   8   9 
  3   5   9  10   7  40   2  45  21 [23] i=0  3<23? OK swap  3 and  3 storeIndex=1
  3   5   9  10   7  40   2  45  21 [23] i=1  5<23? OK swap  5 and  5 storeIndex=2
  3   5   9  10   7  40   2  45  21 [23] i=2  9<23? OK swap  9 and  9 storeIndex=3
  3   5   9  10   7  40   2  45  21 [23] i=3 10<23? OK swap 10 and 10 storeIndex=4
  3   5   9  10   7  40   2  45  21 [23] i=4  7<23? OK swap  7 and  7 storeIndex=5
  3   5   9  10   7  40   2  45  21 [23] i=5 40<23? 
  3   5   9  10   7   2  40  45  21 [23] i=6  2<23? OK swap 40 and  2 storeIndex=6
  3   5   9  10   7   2  40  45  21 [23] i=7 45<23?
  3   5   9  10   7   2  21  45  40 [23] i=8 21<23? OK swap 21 and 40 storeIndex=7
  3   5   9  10   7   2  21 [23] 40  45  i=9 23<23? OK swap 23 and 45 storeIndex=8

leftIndex = 0
storeIndex = 0
rightIndex = 6
pivot indicated in [ ]
9 picked as initial pivot and moved to right most
  0   1   2   3   4   5   6
  3   5  21  10   7   2  [9] 23  40  45  i=0  3<9? OK swap  3 and   3 storeIndex=1
  3   5  21  10   7   2  [9] 23  40  45  i=1  5<9? OK swap  5 and   5 storeIndex=2
  3   5  21  10   7   2  [9] 23  40  45  i=2 21<9?
  3   5  21  10   7   2  [9] 23  40  45  i=3 10<9?
  3   5   7  10  21   2  [9] 23  40  45  i=4  7<9? OK swap  7 and  21 storeIndex=3
  3   5   7   2  21  10  [9] 23  40  45  i=5  2<9? OK swap  2 and  10 storeIndex=4
  3   5   7   2  [9] 10  21  23  40  45  i=6  9<9? OK swap  9 and  21 storeIndex=5

leftIndex = 0
storeIndex = 0
rightIndex = 2
pivot indicated in [ ]
21 picked as initial pivot and moved to right most
                     0   1 
  3   5   7   2   9  10 [21] 23  40  45  i=0 10<21? OK swap 10 and 10 storeIndex=1
  3   5   7   2   9  10 [21] 23  40  45  i=1 21<21? OK swap 21 and 21 storeIndex=2

The kth smallest element in the list where k=6 is 10.

Another Example

Description

You have been asked by the obnoxious and impatient statistician to write a program that given a data set (an array of n ≤ 108 positive integers) and an integer k in the range 1..n will output the k-th smallest element. If a long delay occurs when the program is running, this person will start yelling serious abuse at you (but won’t destroy your career outright). He is just as likely to give you data that is already sorted as data that is completely random-looking, and there may be many duplicate entries in the data.

Input format: The first line gives the value of n followed by the value of k. The next n lines give the integers in the array, one per line. This repeats and a case where n = 0 terminates the file.

Sample Input:

2 2
1
3
5 1
5
8
4
12
10
5 3
7
1
6
8
9
0 0

Sample Output:

Data set 1: element 2 is 3
Data set 2: element 1 is 4
Data set 3: element 3 is 7

Python Code

from random import randint

def main():
	line = raw_input()
	line = line.split()

	n_inputs = int(line[0])
	k_element = int(line[1])

	j = 1

	while n_inputs!=0:
		arr = []
		for i in range(0, n_inputs):
			input = int(raw_input())
			arr.append(input)

		output = quickselectstart(arr, k_element)

		output = "Data set %d: element %d is %d" % (j, k_element, output)

		print output

		j = j+1
		
		line = raw_input()
		line = line.split()

		n_inputs = int(line[0])
		k_element = int(line[1])

def quickselectstart(list, k):
	return quickselect(list, 0, len(list) -1, k)

# Recursively determines the kth order statistic for the given list.
def quickselect(list, left_index, right_index, k):
	#base case
	if left_index == right_index:
		return list[left_index]
	#Partition the sublist into two halves
	pivot_index = randomPartition(list, left_index, right_index)
	size_left = pivot_index - left_index + 1
	#Perform comparisons and recurse in binary search / quicksort fashion
	if size_left == k:
		return list[pivot_index]
	elif size_left > k:
		return quickselect(list, left_index, pivot_index - 1, k)
	else:
		return quickselect(list, pivot_index + 1, right_index, k - size_left)

# Randomly partitions a set about a pivot such that the values to the left
# of the pivot are less than or equal to the pivot and the values to the
# right of the pivot are greater than the pivot.
def randomPartition(list, left_index, right_index):
	pivot_index = int(randint(left_index, right_index))
	pivot_value = list[pivot_index]
	store_index = left_index

	#swaping
	temp = list[pivot_index]
	list[pivot_index] = list[right_index]
	list[right_index] = temp

	for i in range(left_index,right_index):
		if list[i] <= pivot_value:
			temp = list[store_index]
			list[store_index] = list[i]
			list[i] = temp
			store_index = store_index + 1

	temp = list[right_index];
	list[right_index] = list[store_index]
	list[store_index] = temp

	return store_index

main()