Matt Maycock
12/23/2004 7:04:00 PM
# ========== ext/input_reader.rb ==========
# ext/input_reader.rb
# accept some name argument. if name is nil || '-',
# then use $stdin and send that to the block
# otherwise, use File.open(name, *restargs)
require 'delegate'
def input_reader(fname, *fargs)
block = Proc.new {|source|
getter = SimpleDelegator.new(source)
yield getter
getter.__setobj__(nil)
}
if fname.nil? || fname == '-' then
block[$stdin]
else
File.open(fname, *fargs) {|f| block[f]}
end
end
# ========== ext/input_reader.rb ==========
# ========== ext/text2lines.rb ==========
# ext/text2lines.rb
# Take character input from a source at a time, and
# if we've struct a sequence \n|\r gold, then replace
# print an argument-defined eoln marker, instead.
#
# endls is the array of eoln marker characters.
# valid integers (string or actual) are valid, along
# with the string `newline' - which reduces to the
# system newline $/
#
# source is anything that responds to getc()
#
# sink is anything that responds to print(string)
#
require 'ext/input_reader'
def text2lines(endls, source, sink=nil)
separator, marker = *endls.inject([[], []]) {|(sep, mark), new|
case new.downcase
when /^(-)?newline$/ then ($1.nil? ? mark : sep) << $/
when /^(-)?\d+$/ then ($1.nil? ? mark : sep) << new.to_i.abs.chr
end
[sep, mark]
}
marker = marker.empty? ? $/ : marker.join('')
separator = [10.chr, 13.chr] if separator.empty?
char, prev, lastp = nil, nil, true
lline, splitter = nil, ''
counts = Hash.new(0)
pchar = Proc.new {
if block_given? then
lline ||= ''
lline << char.chr
else
sink.print char.chr
end
}
pmark = Proc.new {
if block_given? then
yield(lline || '', marker)
lline = nil
else
sink.print marker
end
}
domark = Proc.new {
pmark[]
counts = Hash.new(0)
}
pnull = Proc.new {}
if separator.include?(char.chr) then
domark[] if counts[char] > 0
counts[char] += 1
else
domark[] if counts.values.include?(1)
pchar[]
end while (char = source.getc)
domark[]
end
def read_text2lines(file, *args)
lines, chomper = [], args.delete('-c') {false}
input_reader(file) {|source|
text2lines(args.map {|i| i.to_s}, source) {|l,t|
lines << l
lines[-1] << t unless chomper || t.nil?
}
}
lines
end
# ========== ext/text2lines.rb ==========
# ========== ~/local/bin/text2lines ==========
#!/usr/bin/env ruby
require 'ext/text2lines'
def usage(out=$stdout)
out.puts <<-END_USAGE
Usage: #{$0} [newline | ascii-code]+
Replaces all instances of \\r and \\n with new end of line
markers. \\r\\n and \\n\\r are treated as one unit. \\r\\r and
\\n\\n are treated as two.
The new markers are formed from command-line arguments. If
no arguments are given, then the system's end of line marker
is used. Otherwise, the sequence of ascii-codes / newlines
are used, with newline representing the system's end of line
marker. Characters are read from stdin.
EXAMPLES:
#{$0} 13 10
replaces all `standard' end of line markers with \\r\\n.
#{$0} newline
replaces all `standard' end of line markers with the system
end of line marker.
END_USAGE
exit(-1)
end
args = ARGV.map {|arg|
case arg
when /^--+h(e(l(p)?)?)?$/i then usage
when /^newline$/ then arg
when /^\d+$/ then arg
else
$stderr.puts "Error - bad argument #{arg}"
usage[$stderr]
end
}
text2lines(args, $stdin, $stdout)
# ========== ~/local/bin/text2lines ==========
[ummaycoc@localhost ummaycoc]$ echo 'hello
my
ruby
loving
friends' | text2lines 65
helloAmyArubyAlovingAfriendsA[ummaycoc@localhost ummaycoc]$
[ummaycoc@localhost ummaycoc]$ echo 'hello
my
ruby
loving
friends' | text2lines 13 > rubytmp
[ummaycoc@localhost ummaycoc]$ more rubytmp
friends
[ummaycoc@localhost ummaycoc]$
so, obviously, if this doesn't work for you - getc will :-)
--
There's no word in the English language for what you do to a dead
thing to make it stop chasing you.