Frank Fischer
5/20/2009 7:26:00 AM
On 2009-05-18, Frank Fischer <frank-fischer@shadow-soft.de> wrote:
Here's a second version, this time returning the correct number of books
(the first version returned the requested number of books + 1). And it
should be a little cleaner and faster.
Frank
--
require 'facets/enumerable'
require 'enumerator'
require 'pp'
word_file = ARGV.shift || "words.txt"
nbooks = (ARGV.shift || 10).to_i
algorithm = (ARGV.shift || 4).to_i
# ruby 1.8
if RUBY_VERSION[0,4] == "1.8."
class Integer
def ord; self; end
end
end
# A book containing the words starting with a letter in first .. last
class Book
attr_reader :first, :last, :n_words, :words
# Create a book with n words whose first letter is in range.
def initialize( range, n_words )
@range = range.min.ord .. range.max.ord
@n_words = n_words
end
# Set the words in this book
def words=( words )
@words = words.find_all{|w| include?(w.downcase[0])}
@n_words = @words.size
end
# Returns true if the words starting with the given letter are in
# this book
def include?( letter )
@range.include?( letter.ord - ?a.ord )
end
# Returns the character-range.
def range
if @range.min == @range.max
(?A.ord + @range.min).chr
else
(?A.ord + @range.min).chr + "-" + (?A.ord + @range.max).chr
end
end
def to_s; "<Book: %3s, size: #@n_words>" % range; end
def inspect; to_s; end
end
class Wordlist
attr_reader :books
def initialize( words )
# sort them
@words = words.sort {|w1, w2| w1.downcase <=> w2.downcase }
# get number of words per letter
@nletters = Array.new( 26, 0 )
@words.each do |w|
@nletters[w.downcase[0].ord - ?a.ord] += 1
end
end
# Returns the number of words per character
def letter_counts
(0...@nletters.size).mash{|i| [(?a.ord + i).chr, @nletters[i]]}
end
# divide into +nbooks+ books by minimizing the maximal number
# of words in one book
def min_maximum( nbooks )
dyn_min( nbooks ) { |s, rest| [s, rest || 0].max }
end
# divide into +nbooks+ books by maximizing the minmal number
# of words in one book
def max_minimum( nbooks )
dyn_min( nbooks ) { |s, rest| [-s, (rest || -s)].max }
end
# divide into +nbooks+ books by minimizing the deviation from the
# average number of words in one book
def min_deviat( nbooks )
mean = sum( 0 ... @nletters.size ).to_f / nbooks
dyn_min( nbooks ) { |s, rest| (rest || 0) + (s - mean).abs }
end
# divide into +nbooks+ books by minimizing the quadratic deviation
# from the average number of words in one book
def min_deviat2( nbooks )
mean = sum( 0 ... @nletters.size ).to_f / nbooks
dyn_min( nbooks ) { |s, rest| (rest || 0) + (s - mean) ** 2 }
end
private
# computes
# sum_{i\in range} @nletters[i]
def sum( range )
range.inject(0) {|s,i| @nletters[i] + s}
end
# Computes a solution minimizing the objective function.
def dyn_min( nbooks, &func )
# create initial books (1 book)
books = Array.new( @nletters.size )
s = 0
(@nletters.size - 1).downto(0) do |i|
s += @nletters[i]
range = i ... @nletters.size
sum = sum(range)
books[i] = [[Book.new( range, sum )], func.call(sum, nil)]
end
(nbooks - 1).times do
books = (0 ... @nletters.size).map { |s|
best_value = nil
best_end = nil
sum = @nletters[s] # value of the current part
for e in s.next ... @nletters.size
if books[e]
value = func.call(sum, books[e][1])
if best_value.nil? || value < best_value
best_value = value
best_end = e
end
end
sum += @nletters[e] || 0
end
if best_value
[[Book.new(s...best_end, sum(s...best_end))] +
books[best_end][0],
best_value]
else
nil
end
}
end
@books = books[0][0]
# collect the words into the books
@books.each do |book|
book.words = @words
end
end
end
# read word list
words = []
File.open( word_file, "r" ) do |f|
f.each_line do |line|
line.strip!
words << line unless line.empty?
end
end
list = Wordlist.new( words )
p list.letter_counts
puts "Number of words: #{list.letter_counts.values.inject(0){|s,x| s + x}}"
case algorithm
when 1
list.min_maximum( nbooks )
when 2
list.max_minimum( nbooks )
when 3
list.min_deviat( nbooks )
when 4
list.min_deviat2( nbooks )
else
raise "Unknown algorithm: #{algorithm} (should be in {1,2,3,4})"
end
pp list.books.mash{|book| [book.range, book.words]}
p list.books.mash{|book| [book.range, book.n_words]}
--