[lnkForumImage]
TotalShareware - Download Free Software

Confronta i prezzi di migliaia di prodotti.
Asp Forum
 Home | Login | Register | Search 


 

Forums >

comp.lang.ruby

PATCH to make internal Hash class retain order...

Thies C. Arntzen

8/12/2006 4:51:00 PM

Hi there -

beeing new to ruby (thru rails) and coming from a strong php background

i really miss beeing able to retain order in a Hash. i know that this
has been discussed in the past i also understand that i can always
write my own OrderedHash implemenation in user-space. -but- as a) i
love to poke around b) userspace was too slow for some of my problems
-and- c) it's always best to to let "code speak" i decided to "scratch
my itch" and wrote a patch (against 1.8.4) that will:

* remember the order in which elements have been added to a Hash (at
the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
per hash-table)
* use this order for iterating (st_foreach will use this order)
* implement a call Hash.order! allows the user to change the internal
order;-)

y = { 'd' => '4', 'a' => '1', 'b' => '2', 'c' => '3' }
puts y.inspect

y.order!
puts y.inspect

y.order! {|a,b| b[1] <=> a[1]}
puts y.inspect

will now get you:
{"d"=>"4", "a"=>"1", "b"=>"2", "c"=>"3"}
{"a"=>"1", "b"=>"2", "c"=>"3", "d"=>"4"}
{"d"=>"4", "c"=>"3", "b"=>"2", "a"=>"1"}


please bear with me as i'm not fully used to all the conventions you
use in the code - also this has only seen minimal testing...

suggestions welcome!

regards, tc

--- snipp --

diff -ru ruby-1.8.4/hash.c ruby-thies/hash.c
--- ruby-1.8.4/hash.c 2005-07-19 10:25:37.000000000 +0200
+++ ruby-thies/hash.c 2006-08-12 17:28:56.000000000 +0200
@@ -1195,6 +1195,46 @@
return entries;
}

+/*
+ * call-seq:
+ * hsh.order! => hash
+ * hsh.order! {|a,b| block } => hash
+ *
+ * Orders <i>hsh</i> according to code <code>[</code> <i>key,
+ * value</i> <code>]</code>. If no block is given the sort order
+ * defaults to the keyorder.
+ *
+ * h = { "b" => 30, "c" => 10, "a" => 20 }
+ * h.order! #=> { "c" => 10, "a" => 20, "b"
=> 30 }
+ * h.order! {|a,b| a[0]<=>b[0]} #=> { "a" => 20, "b" => 30, "c"
=> 10 }
+ *
+ */
+
+static VALUE
+rb_hash_order(hash)
+ VALUE hash;
+{
+ VALUE *keyorder;
+ VALUE entries = rb_hash_to_a(hash);
+ int i;
+
+ rb_ary_sort_bang(entries);
+
+ keyorder = ALLOC_N(VALUE, RARRAY(entries)->len);
+ if (! keyorder) {
+ return 0;
+ }
+
+ for (i = 0; i < RARRAY(entries)->len; i++) {
+ keyorder[i] = RARRAY(RARRAY(entries)->ptr[i])->ptr[0];
+ }
+
+ st_reorder(RHASH(hash)->tbl, keyorder);
+ free(keyorder);
+
+ return hash;
+}
+
static int
inspect_i(key, value, str)
VALUE key, value, str;
@@ -2449,6 +2489,7 @@
rb_define_method(rb_cHash,"each_key", rb_hash_each_key, 0);
rb_define_method(rb_cHash,"each_pair", rb_hash_each_pair, 0);
rb_define_method(rb_cHash,"sort", rb_hash_sort, 0);
+ rb_define_method(rb_cHash,"order!", rb_hash_order, 0);

rb_define_method(rb_cHash,"keys", rb_hash_keys, 0);
rb_define_method(rb_cHash,"values", rb_hash_values, 0);
diff -ru ruby-1.8.4/rubytest.rb ruby-thies/rubytest.rb
--- ruby-1.8.4/rubytest.rb 2005-02-06 18:03:32.000000000 +0100
+++ ruby-thies/rubytest.rb 2006-08-12 15:04:15.000000000 +0200
@@ -42,6 +42,7 @@
print "test succeeded\n"
exit 0
end
+ puts line
error << line if line =~ %r:^(sample/test.rb|not):
end
print error
Only in ruby-thies: signal.o
Only in ruby-thies: sprintf.o
diff -ru ruby-1.8.4/st.c ruby-thies/st.c
--- ruby-1.8.4/st.c 2005-12-20 05:13:21.000000000 +0100
+++ ruby-thies/st.c 2006-08-12 18:05:55.000000000 +0200
@@ -13,11 +13,25 @@

typedef struct st_table_entry st_table_entry;

+#define ST_APPEND_TO_GLOBAL_ORDER(tbl, entry)+ (entry)->gnext = NULL;+ (entry)->gprev = (tbl)->tail;+ if ((entry)->gprev) {+ (entry)->gprev->gnext = (entry);+ }+ (tbl)->tail = (entry);+ if (! (tbl)->head) {+ (tbl)->head = (entry);+ }
+
+
struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
+ st_table_entry *gprev;
+ st_table_entry *gnext;
};

#define ST_DEFAULT_MAX_DENSITY 5
@@ -135,6 +149,52 @@
}
#endif

+static int
+st_unlink_entry(table, entry)
+ st_table *table;
+ st_table_entry *entry;
+{
+ int i;
+ st_table_entry *last, *ptr;
+
+ for (i = 0; i < table->num_bins; i++) {
+ last = NULL;
+ ptr = table->bins[i];
+ while (ptr) {
+ if (ptr == entry) break; /* found */
+ last = ptr;
+ ptr = ptr->next;
+ }
+ if (! ptr) continue; /* correct bin it yet to be found */
+
+ if (last) {
+ last->next = ptr->next;
+ } else {
+ table->bins[i] = ptr->next;
+ }
+
+ if (table->head == ptr) {
+ table->head = ptr->gnext;
+ }
+ if (table->tail == ptr) {
+ table->tail = ptr->gprev;
+ }
+
+ if (ptr->gnext) {
+ ptr->gnext->gprev = ptr->gprev;
+ }
+
+ if (ptr->gprev) {
+ ptr->gprev->gnext = ptr->gnext;
+ }
+
+ table->num_entries--;
+ return 1;
+ }
+ return 0;
+}
+
+
st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
@@ -157,6 +217,8 @@
tbl->num_bins = size;
tbl->bins = (st_table_entry **)Calloc(size,
sizeof(st_table_entry*));

+ tbl->head = tbl->tail = NULL;
+
return tbl;
}

@@ -269,6 +331,7 @@
entry->record = value; entry->next = table->bins[bin_pos]; table->bins[bin_pos] = entry;+ ST_APPEND_TO_GLOBAL_ORDER(table, entry) table->num_entries++; } while (0)

@@ -339,39 +402,74 @@
{
st_table *new_table;
st_table_entry *ptr, *entry;
- int i, num_bins = old_table->num_bins;
+ int num_bins = old_table->num_bins;
+ unsigned int bin_pos;

- new_table = alloc(st_table);
+ new_table = st_init_table_with_size(old_table->type,
old_table->num_bins);
if (new_table == 0) {
return 0;
}

- *new_table = *old_table;
- new_table->bins = (st_table_entry**)
- Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+ ptr = old_table->head;
+ while (ptr) {
+ entry = alloc(st_table_entry);
+ if (! entry) {
+ st_free_table(new_table); /* safe as new_table is consistent */
+ return 0;
+ }
+ *entry = *ptr;
+ bin_pos = entry->hash % num_bins;
+ entry->next = new_table->bins[bin_pos];
+ new_table->bins[bin_pos] = entry;
+ ST_APPEND_TO_GLOBAL_ORDER(new_table, entry);
+ new_table->num_entries++;

- if (new_table->bins == 0) {
- free(new_table);
- return 0;
+ ptr = ptr->gnext;
}

- for(i = 0; i < num_bins; i++) {
- new_table->bins[i] = 0;
- ptr = old_table->bins[i];
- while (ptr != 0) {
- entry = alloc(st_table_entry);
- if (entry == 0) {
- free(new_table->bins);
- free(new_table);
- return 0;
- }
- *entry = *ptr;
- entry->next = new_table->bins[i];
- new_table->bins[i] = entry;
- ptr = ptr->next;
+ return new_table;
+}
+
+void
+st_reorder(table, keylist)
+ st_table *table;
+ st_data_t *keylist;
+{
+ st_table buf_table, *org_table;
+ st_table_entry *entry;
+ st_data_t key;
+ unsigned int i, hash_val, bin_pos;
+
+ /*
+ * "algorithmus":
+ * 1 remeber old table content
+ * 2 reinitialize old_table to be empty
+ * 3 refill old table in corrent order (supplied by keylist) using

the old content
+ */
+
+ buf_table = *table;
+ org_table = &buf_table;
+
+ table->num_entries = 0;
+ table->bins = (st_table_entry **)Calloc(table->num_bins,
sizeof(st_table_entry*));
+ table->head = table->tail = NULL;
+
+ for (i = 0; i < org_table->num_entries; i++) {
+ key = keylist[i];
+ hash_val = do_hash(key, org_table);
+ FIND_ENTRY(org_table, entry, hash_val, bin_pos);
+ if (! entry) {
+ /* should not happen */
+ printf("uoooo...\n");
}
+
+ entry->next = table->bins[bin_pos];
+ table->bins[bin_pos] = entry;
+ ST_APPEND_TO_GLOBAL_ORDER(table, entry);
+ table->num_entries++;
}
- return new_table;
+
+ free(org_table->bins);
}

int
@@ -381,39 +479,26 @@
st_data_t *value;
{
unsigned int hash_val;
- st_table_entry *tmp;
register st_table_entry *ptr;

hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];

- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+ while (ptr) {
+ if (EQUAL(table, ptr->key, *key)) break;
+ ptr = ptr->next;
}

- if (EQUAL(table, *key, ptr->key)) {
- table->bins[hash_val] = ptr->next;
- table->num_entries--;
+ if (ptr) {
+ st_unlink_entry(table, ptr);
if (value != 0) *value = ptr->record;
*key = ptr->key;
free(ptr);
return 1;
+ } else {
+ if (value != 0) *value = 0;
+ return 0;
}
-
- for(; ptr->next != 0; ptr = ptr->next) {
- if (EQUAL(table, ptr->next->key, *key)) {
- tmp = ptr->next;
- ptr->next = ptr->next->next;
- table->num_entries--;
- if (value != 0) *value = tmp->record;
- *key = tmp->key;
- free(tmp);
- return 1;
- }
- }
-
- return 0;
}

int
@@ -429,22 +514,21 @@
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];

- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
+ while (ptr) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) break;
+ ptr = ptr->next;
}

- for(; ptr != 0; ptr = ptr->next) {
- if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
- table->num_entries--;
- *key = ptr->key;
- if (value != 0) *value = ptr->record;
- ptr->key = ptr->record = never;
- return 1;
- }
+ if (ptr) {
+ table->num_entries--;
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ } else {
+ if (value != 0) *value = 0;
+ return 0;
}
-
- return 0;
}

static int
@@ -472,46 +556,40 @@
int (*func)();
st_data_t arg;
{
- st_table_entry *ptr, *last, *tmp;
+ st_table_entry *ptr, *tmp;
enum st_retval retval;
- int i;
-
- for(i = 0; i < table->num_bins; i++) {
- last = 0;
- for(ptr = table->bins[i]; ptr != 0;) {
- retval = (*func)(ptr->key, ptr->record, arg);
- switch (retval) {
- case ST_CHECK: /* check if hash is modified during iteration */
- tmp = 0;
- if (i < table->num_bins) {
- for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
- if (tmp == ptr) break;
- }
- }
- if (!tmp) {
- /* call func with error notice */
- return 1;
- }
- /* fall through */
- case ST_CONTINUE:
- last = ptr;
- ptr = ptr->next;
- break;
- case ST_STOP:
- return 0;
- case ST_DELETE:
- tmp = ptr;
- if (last == 0) {
- table->bins[i] = ptr->next;
- }
- else {
- last->next = ptr->next;
- }
- ptr = ptr->next;
- free(tmp);
- table->num_entries--;
- }
- }
+
+ ptr = table->head;
+ while (ptr) {
+ retval = (*func)(ptr->key, ptr->record, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration
*/
+ tmp = table->head;
+ while (tmp) {
+ if (tmp == ptr) break; /* current element is still
there - good */
+ tmp = tmp->gnext;
+ }
+ if (! tmp) {
+ /* call func with error notice */
+ return 1;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ ptr = ptr->gnext;
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ tmp = ptr;
+ ptr = ptr->gnext;
+ st_unlink_entry(table, tmp);
+ free(tmp);
+ break;
+ default:
+ /* should never happen! */
+ printf("uhh? retval=%d\n", retval);
+ exit(-10);
+ }
}
return 0;
}
diff -ru ruby-1.8.4/st.h ruby-thies/st.h
--- ruby-1.8.4/st.h 2005-02-08 01:50:33.000000000 +0100
+++ ruby-thies/st.h 2006-08-12 17:51:50.000000000 +0200
@@ -21,6 +21,8 @@
int num_bins;
int num_entries;
struct st_table_entry **bins;
+ struct st_table_entry *head;
+ struct st_table_entry *tail;
};

#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
@@ -53,6 +55,7 @@
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
+void st_reorder _((st_table *, st_data_t *));

#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)

20 Answers

ts

8/12/2006 5:07:00 PM

0

>>>>> "T" == Thies C Arntzen <thieso@gmail.com> writes:

T> i really miss beeing able to retain order in a Hash. i know that this
T> has been discussed in the past i also understand that i can always
T> write my own OrderedHash implemenation in user-space. -but- as a) i
T> love to poke around b) userspace was too slow for some of my problems
T> -and- c) it's always best to to let "code speak" i decided to "scratch
T> my itch" and wrote a patch (against 1.8.4) that will:

Why you don't first write a C extension, rather than modifying ruby ?


--

Guy Decoux

Brian McCallister

8/12/2006 5:53:00 PM

0


On Aug 12, 2006, at 9:55 AM, Thies C. Arntzen wrote:

> Hi there -
>
> beeing new to ruby (thru rails) and coming from a strong php
> background

<snip />

> * remember the order in which elements have been added to a Hash (at
> the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
> per hash-table)
> * use this order for iterating (st_foreach will use this order)
> * implement a call Hash.order! allows the user to change the internal
> order;-)

Nice :-) The better place for discussion hacking on the ruby
interpreter itself is probably ruby-core.

How difficult would it be to do an OrderedHash data structure as a C
extension instead of in Ruby?

-Brian




Thies C. Arntzen

8/12/2006 6:32:00 PM

0

Brian McCallister schrieb:

> On Aug 12, 2006, at 9:55 AM, Thies C. Arntzen wrote:
>
> > Hi there -
> >
> > beeing new to ruby (thru rails) and coming from a strong php
> > background
>
> <snip />
>
> > * remember the order in which elements have been added to a Hash (at
> > the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
> > per hash-table)
> > * use this order for iterating (st_foreach will use this order)
> > * implement a call Hash.order! allows the user to change the internal
> > order;-)
>
> Nice :-) The better place for discussion hacking on the ruby
> interpreter itself is probably ruby-core.

i wasn't aware of that list;-) subscribed...

>
> How difficult would it be to do an OrderedHash data structure as a C
> extension instead of in Ruby?

i really believe that it needs to be a 1st class citizen - the reason
it's done in C is because userland imposes too much overhead (i
believe) and converting forward<->back to the normal Hash would be
slow, memory intensive and ordering information would get lost on the
way...

also the amount of code added to the Hash class is ~20 lines of C-code
- this code adds zero overhead. the bigger chunk of the code was added
to the st.c which is the underlying library for handling hash-tables -
and duplicating > 600 lines of code to add ~60 lines didn't seem very
cool to me;-)

best, thies

dblack

8/12/2006 6:46:00 PM

0

Robert Klemme

8/12/2006 10:20:00 PM

0

Thies C. Arntzen wrote:
> Brian McCallister schrieb:
>
>> On Aug 12, 2006, at 9:55 AM, Thies C. Arntzen wrote:
>>
>>> Hi there -
>>>
>>> beeing new to ruby (thru rails) and coming from a strong php
>>> background
>> <snip />
>>
>>> * remember the order in which elements have been added to a Hash (at
>>> the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
>>> per hash-table)
>>> * use this order for iterating (st_foreach will use this order)
>>> * implement a call Hash.order! allows the user to change the internal
>>> order;-)
>> Nice :-) The better place for discussion hacking on the ruby
>> interpreter itself is probably ruby-core.
>
> i wasn't aware of that list;-) subscribed...
>
>> How difficult would it be to do an OrderedHash data structure as a C
>> extension instead of in Ruby?
>
> i really believe that it needs to be a 1st class citizen, - the reason
> it's done in C is because userland imposes too much overhead (i
> believe) and converting forward<->back to the normal Hash would be
> slow, memory intensive and ordering information would get lost on the
> way...
>
> also the amount of code added to the Hash class is ~20 lines of C-code
> - this code adds zero overhead. the bigger chunk of the code was added
> to the st.c which is the underlying library for handling hash-tables -
> and duplicating > 600 lines of code to add ~60 lines didn't seem very
> cool to me;-)

Adding to David's comments: you're changing semantics of one of the core
classes which is in itself a dangerous thing. Plus, you increase the
memory footprint of a hash which is IMHO a very bad idea for such a
widely used data structure (and some of these hashes grow huge). So, if
you want to patch ruby core, make it at least a subclass of Hash so
people can choose.

Then of course there's a whole bunch of other things that one should
consider. Is the name properly chosen? Do we have to provide a means
for the user to *define* the standard order on creation? This is e.g.
the way Java's TreeSet works - and IMHO it's superior vs. having to call
order! manually. etc.

Kind regards

robert

Trans

8/12/2006 11:01:00 PM

0


Robert Klemme wrote:

> Adding to David's comments: you're changing semantics of one of the core
> classes which is in itself a dangerous thing. Plus, you increase the
> memory footprint of a hash which is IMHO a very bad idea for such a
> widely used data structure (and some of these hashes grow huge). So, if
> you want to patch ruby core, make it at least a subclass of Hash so
> people can choose.
>
> Then of course there's a whole bunch of other things that one should
> consider. Is the name properly chosen? Do we have to provide a means
> for the user to *define* the standard order on creation? This is e.g.
> the way Java's TreeSet works - and IMHO it's superior vs. having to call
> order! manually. etc.

All good points. I still think it would be nice if such a class were
built-in to Ruby with literal:

[ :a => 1, :b => 2 ]

Maybe as a red-black tree class.

T.

Hal E. Fulton

8/12/2006 11:44:00 PM

0

ts wrote:
>>>>>>"T" == Thies C Arntzen <thieso@gmail.com> writes:
>
>
> T> i really miss beeing able to retain order in a Hash. i know that this
> T> has been discussed in the past i also understand that i can always
> T> write my own OrderedHash implemenation in user-space. -but- as a) i
> T> love to poke around b) userspace was too slow for some of my problems
> T> -and- c) it's always best to to let "code speak" i decided to "scratch
> T> my itch" and wrote a patch (against 1.8.4) that will:
>
> Why you don't first write a C extension, rather than modifying ruby ?
>

Interesting question... do you mean a C extension implementing a
different Hash class or what?


Hal


Hal E. Fulton

8/12/2006 11:46:00 PM

0

Brian McCallister wrote:
>
> On Aug 12, 2006, at 9:55 AM, Thies C. Arntzen wrote:
>
>> Hi there -
>>
>> beeing new to ruby (thru rails) and coming from a strong php background
>
>
> <snip />
>
>> * remember the order in which elements have been added to a Hash (at
>> the cost of 2 extra pointers in every hash-bucket + 2 extra pointers
>> per hash-table)
>> * use this order for iterating (st_foreach will use this order)
>> * implement a call Hash.order! allows the user to change the internal
>> order;-)
>
>
> Nice :-) The better place for discussion hacking on the ruby
> interpreter itself is probably ruby-core.
>
> How difficult would it be to do an OrderedHash data structure as a C
> extension instead of in Ruby?

Well, if it were another class, then you would lose the order
information before you ever got into that class, right?

hash = {a=>1, b=>2, c=>3}

ordered = OrderedHash.new(hash) # Oops -- too late


Hal

Hal E. Fulton

8/12/2006 11:48:00 PM

0

dblack@wobblini.net wrote:
>
> The problem you likely face, though, is that no one will use it (which
> may or may not matter to you :-) You're requiring that people use a
> patched Ruby, and if anyone writes an application that uses it, then
> they require that everyone who runs the application use a patched
> Ruby, and so on.
>
> It's much more likely -- probably literally thousands of times more
> likely -- that people will try out something distributed as an
> extension than that they'll patch the Ruby source. And C extensions
> are very fast, once loaded.

My impression was that this was for play and for testing
prior to possibly being folded into Ruby.

Also, I don't yet see how this could be implemented as a
C extension. See my previous email.


Hal

dblack

8/12/2006 11:52:00 PM

0