noise

計算機科学や各種設定のメモ

中央値を求めるアルゴリズム(決定的/乱択的比較)

説明

QuickSelectと呼ばれているO(n)時間アルゴリズムを利用する。

実装

module QuickSelect
  private
  def quick_select_0(arr, k, pivot_selector, &block)
    return nil if arr.length == 0
    return arr[0] if arr.length == 1
    return nil unless 0 <= k && k < arr.length

    pivot = pivot_selector.call(arr)
    left,right = [],[]

    arr.each do |e|
      t = (block_given? ? block.call(e, pivot) : (e <=> pivot))
      if t < 0
        left << e
      else
        right << e
      end
    end

    if k < left.length
      quick_select_0(left, k, pivot_selector, &block)
    else
      quick_select_0(right, k - left.length, pivot_selector, &block)
    end
  end

  def median_of_medians(arr, &block)
    acc = []
    offset = 0

    while offset < arr.length
      t = (offset + 5 > arr.length) ? arr[offset...arr.length] : arr[offset...offset+5]
      acc << t.sort(&block)[t.length/2]
      offset += 5
    end

    if acc.length == 1
      acc[0]
    else
      median_of_medians(acc, &block)
    end
  end

  public
  def quick_select_randomized(k, &block)
    quick_select_0(self, k, lambda{|s| s[Random.rand(s.length)] }, &block)
  end

  def quick_select_deterministic(k, &block)
    quick_select_0(self, k, lambda{|s| median_of_medians(s, &block) }, &block)
  end
end

if __FILE__ == $0
  class Array
    include QuickSelect
  end

  def time(message, &blk)
    start_time = Time.now
    blk.call
    puts [message,(Time.now - start_time).to_s].join(":")
  end

  max = 10_000_000
  k = max / 2
  arr = (0...max).to_a.shuffle

  time "deterministic" do
    arr.quick_select_deterministic(k){|a,b| b <=> a}
  end

  time "randomized" do
    arr.quick_select_randomized(k){|a,b| b <=> a}
  end

  time "sort" do
    arr.sort{|a,b| b <=> a}[k]
  end
end

実行

実行時間計測結果(単位:秒)

deterministic:14.266219713
randomized:8.128821934
sort:20.739770068

所感

決定的アルゴリズムにおけるPivot選択のコストが高いため乱択アルゴリズムを用いたほうが高速だった。