When using expanded classes is not always the best performance option

by Finnian Reilly (modified: 2015 Nov 05)

Expanded class V 64 bit integer

While working on my current project I discovered an interesting fact that for certain kinds of data, expanded classes don't provide the best performance. My project involves processing a lot of sub-strings and requires a representation of a sequential list of integer intervals. The obvious choice is to use an instance of ARRAYED_LIST [INTEGER_INTERVAL], but as performance is critical this would not be fast enough for this project. So instead I devised a class to represent integer intervals as 64 bit integers and use "bit twiddling" to get and set the upper and lower bounds. The design was intended to minimise garbage collection and array indexing.

class EL_SEQUENTIAL_INTERVALS inherit ARRAYED_LIST [INTEGER_64]

Although I was satisfied with it's performance, it occurred to me that I might be able to simplify the code without sacrificing anything by using an expanded class to represent the integer interval as follows:

expanded class EL_INTEGER_INTERVAL feature -- Access lower: INTEGER upper: INTEGER count: INTEGER do Result := upper - lower + 1 end end

My expectation was that this would give similar performance to the ARRAYED_LIST [INTEGER_64] implementation, but I was completely wrong as the following benchmark shows. The first test takes almost double the time using the expanded class implementation.

Benchmark INTEGER_64 implementation

class: ZSTRING_BENCHMARK concatenate_chinese_characters: 0.240 millsecs. Storage: 672 list_pinyin_names: 0.600 millsecs. Storage: 9392 list_chinese_character_names: 0.680 millsecs. Storage: 11264 list_english_descriptions: 0.870 millsecs. Storage: 11920 list_fully_qualified_names: 1.350 millsecs. Storage: 15936

Benchmark expanded class implementation

class: ZSTRING_BENCHMARK concatenate_chinese_characters: 0.470 millsecs. Storage: 672 list_pinyin_names: 0.680 millsecs. Storage: 9392 list_chinese_character_names: 0.860 millsecs. Storage: 11264 list_english_descriptions: 0.760 millsecs. Storage: 11920 list_fully_qualified_names: 1.420 millsecs. Storage: 15936

Each test is the average time of 100 repetitions in the finalised executable compiled with GCC. Inlining_size is set to 2. A full garbage collect is forced during each test iteration

64 bit integer source listing

class EL_SEQUENTIAL_INTERVALS inherit ARRAYED_LIST [INTEGER_64] rename lower as lower_index, upper as upper_index, count as item_count, extend as interval_extend, replace as replace_item redefine out end create make feature -- Access count: INTEGER local l_item: like item do l_item := item Result := upper_integer (l_item) - lower_integer (l_item) + 1 end count_sum: INTEGER local l_area: like area; i, l_count: INTEGER; l_item: like item do l_area := area; l_count := l_area.count from until i = l_count loop l_item := l_area [i] Result := Result + upper_integer (l_item) - lower_integer (l_item) + 1 i := i + 1 end end last_count: INTEGER local l_last: like last do l_last := last Result := upper_integer (l_last) - lower_integer (l_last) + 1 end last_lower: INTEGER do Result := lower_integer (last) end last_upper: INTEGER do Result := upper_integer (last) end lower: INTEGER do Result := lower_integer (item) end out: STRING local l_area: like area; i, l_count: INTEGER; l_item: like item do create Result.make (8 * item_count) l_area := area; l_count := l_area.count from until i = l_count loop l_item := l_area [i] if not Result.is_empty then Result.append (", ") end Result.append_character ('[') Result.append_integer (lower_integer (l_item)) Result.append_character (':') Result.append_integer (upper_integer (l_item)) Result.append_character (']') i := i + 1 end end upper: INTEGER do Result := upper_integer (item) end feature -- Status query item_has (n: INTEGER): BOOLEAN local l_item: like item do l_item := item Result := lower_integer (l_item) <= n and then n <= upper_integer (l_item) end feature -- Element change extend (a_lower, a_upper: INTEGER) require interval_after_last: not is_empty implies a_lower > last_upper do interval_extend (a_lower.to_integer_64 |<< 32 | a_upper) end extend_upper (a_upper: INTEGER) local l_last: like last do if is_empty then extend (a_upper, a_upper) else l_last := last if upper_integer (l_last) + 1 = a_upper then finish; replace_item (l_last + 1) else extend (a_upper, a_upper) end end end cut_before (n: INTEGER) local found: BOOLEAN do from start until found or after loop if n > upper then remove elseif item_has (n) then replace (n, upper) found := True else forth end end end cut_after (n: INTEGER) local found: BOOLEAN do from finish until found or before loop if n < lower then remove; back elseif item_has (n) then replace (lower, n) found := True else back end end end replace (a_lower, a_upper: INTEGER) do replace_item (a_lower.to_integer_64 |<< 32 | a_upper) end feature {NONE} -- Implementation lower_integer (a_item: like item): INTEGER do Result := (a_item |>> 32).to_integer_32 end upper_integer (a_item: like item): INTEGER do Result := (a_item & 0xFFFFFFFF).to_integer_32 end end

Expanded class source listing

class EL_SEQUENTIAL_INTERVALS inherit ARRAYED_LIST [EL_INTEGER_INTERVAL] rename extend as interval_extend, replace as replace_item redefine out end create make feature -- Access count_sum: INTEGER local l_area: like area; i, l_count: INTEGER; l_item: like item do l_area := area; l_count := l_area.count from until i = l_count loop l_item := l_area [i] Result := Result + l_item.upper - l_item.lower + 1 i := i + 1 end end last_count: INTEGER local l_last: like last do l_last := last Result := l_last.upper - l_last.lower + 1 end out: STRING local l_area: like area; i, l_count: INTEGER; l_item: like item do create Result.make (8 * count) l_area := area; l_count := l_area.count from until i = l_count loop l_item := l_area [i] if not Result.is_empty then Result.append (", ") end Result.append_character ('[') Result.append_integer (l_item.lower) Result.append_character (':') Result.append_integer (l_item.upper) Result.append_character (']') i := i + 1 end end feature -- Status query item_has (n: INTEGER): BOOLEAN local l_item: like item do l_item := item Result := l_item.lower <= n and then n <= l_item.upper end feature -- Element change extend (a_lower, a_upper: INTEGER) require interval_after_last: not is_empty implies a_lower > last.upper local l_item: EL_INTEGER_INTERVAL do l_item.set_lower (a_lower); l_item.set_upper (a_upper) interval_extend (l_item) end extend_upper (a_upper: INTEGER) local l_last: like last do if is_empty then extend (a_upper, a_upper) else l_last := last if l_last.upper + 1 = a_upper then l_last.set_upper (a_upper) finish; replace_item (l_last) else extend (a_upper, a_upper) end end end cut_before (n: INTEGER) local found: BOOLEAN do from start until found or after loop if n > item.upper then remove elseif item_has (n) then replace (n, item.upper) found := True else forth end end end cut_after (n: INTEGER) local found: BOOLEAN do from finish until found or before loop if n < item.lower then remove; back elseif item_has (n) then replace (item.lower, n) found := True else back end end end replace (a_lower, a_upper: INTEGER) local l_item: EL_INTEGER_INTERVAL do l_item.set_lower (a_lower); l_item.set_upper (a_upper) replace_item (l_item) end end

Comments
  • Bernd Schoeller (8 years ago 6/11/2015)

    This is interesting - can you also tell us what the benchmarks do, and what the numbers mean?

    Also, did you try to store it as just two play ARRAY[INTEGER] ? Just as I prefer to avoid bit-shifting.

    Last but not least, one might inspect the C code to understand where the speed differences come from - did you have a look?

    • Finnian Reilly (8 years ago 6/11/2015)

      Comparison of C output

      Briefly the benchmarks compare the performance and memory efficiency of STRING_32 and a new string type: alias ZSTRING. ZSTRING uses a hybrid of latin and unicode encodings to give improved memory efficiency and performance in non-Asian language environments. Think of it as a compressed STRING_32.

      The EL_SEQUENTIAL_INTERVALS class is used as an intermediate singleton buffer to record the encoding overflows, viz character substrings which could not be encoded with a latin character set. The buffer is then used to allocate a SPECIAL [NATURAL] array within ZSTRING to store any unencodeable characters.

      class EL_ZCODEC feature -- Basic operations encode ( unicode_in: READABLE_STRING_GENERAL; latin_chars_out: SPECIAL [CHARACTER]; out_offset: INTEGER; unencoded_intervals: EL_SEQUENTIAL_INTERVALS ) -- encode unicode characters as latin -- Set unencodeable characters as the Substitute character (26) and record location in unencoded_intervals require latin_chars_out_big_enough: latin_chars_out.count >= unicode_in.count valid_offset_and_count: unicode_in.count > 0 implies latin_chars_out.valid_index (unicode_in.count + out_offset - 1) local i, count, unicode: INTEGER; uc: CHARACTER_32; c: CHARACTER; l_unicodes: like unicode_table; do l_unicodes := unicode_table; count := unicode_in.count from i := 1 until i > count loop uc := unicode_in [i]; unicode := uc.code if unicode <= 255 and then l_unicodes [unicode] = uc then latin_chars_out [i + out_offset - 1] := uc.to_character_8 else c := latin_character (uc, unicode) if c.code = 0 then latin_chars_out [i + out_offset - 1] := unencoded_character unencoded_intervals.extend_upper (i + out_offset) else latin_chars_out [i + out_offset - 1] := c end end i := i + 1 end end

      As the unencoded_intervals argument is a singleton which is constantly being emptied and refilled, both INTEGER_INTERVAL and ARRAY [INTEGER] will cause a lot of unnecessary garbage collection. ARRAY doubly so as each one comprises two objects, Current and area. So no, I haven't tried it.

      The test labeled "concatenate chinese characters" will cause the most calls to unencoded_intervals, as non of the characters can be encoded with a latin character set. The expanded class version takes twice as long as the INTEGER_64 version

      If you want to compare C output, {EL_SEQUENTIAL_INTERVALS}.item_has might offer a clue to the performance difference.

      The Eiffel code class EL_SEQUENTIAL_INTERVALS inherit ARRAYED_LIST [EL_INTEGER_INTERVAL] feature -- Status query item_has (n: INTEGER): BOOLEAN local l_item: like item do l_item := item Result := l_item.lower <= n and then n <= l_item.upper end correlates with the C code /* {EL_SEQUENTIAL_INTERVALS}.item_has */ EIF_BOOLEAN F1219_15488 (EIF_REFERENCE Current, EIF_INTEGER_32 arg1) { GTCX RTEX; struct eif_ex_1200 sloc1; EIF_REFERENCE loc1 = (EIF_REFERENCE) sloc1.data; EIF_REFERENCE tr1 = NULL; EIF_INTEGER_32 ti4_1; EIF_BOOLEAN Result = ((EIF_BOOLEAN) 0); RTLD; memset (&sloc1.overhead, 0, OVERHEAD + _OBJSIZ_0_0_0_2_0_0_0_0_); sloc1.overhead.ov_flags = EO_EXP | EO_STACK; RT_DFS(&sloc1.overhead, eif_final_id(Y2440,Y2440_gen_type,Dftype(Current),464)); RTLI(3); RTLR(0,loc1); RTLR(1,Current); RTLR(2,tr1); RTEAA("item_has", 1218, Current, 1, 1, 21475); RTGC; RTHOOK(1); tr1 = F1218_5561(Current); tr1 = RTRCL(tr1); RTXA(tr1, loc1); RTHOOK(2); Result = '\0'; ti4_1 = *(EIF_INTEGER_32 *)(RTCV(loc1)+ _LNGOFF_0_0_0_0_); if ((EIF_BOOLEAN) (ti4_1 <= arg1)) { ti4_1 = *(EIF_INTEGER_32 *)(RTCV(loc1)+ _LNGOFF_0_0_0_1_); Result = (EIF_BOOLEAN) (arg1 <= ti4_1); } RTHOOK(3); RTLE; RTEE; return Result; }

      and the Eiffel code class EL_SEQUENTIAL_INTERVALS inherit ARRAYED_LIST [INTEGER_64] rename lower as lower_index, upper as upper_index, count as item_count, extend as interval_extend, replace as replace_item redefine out end feature -- Status query item_has (n: INTEGER): BOOLEAN local l_item: like item do l_item := item Result := lower_integer (l_item) <= n and then n <= upper_integer (l_item) end correlates with the C code /* {EL_SEQUENTIAL_INTERVALS}.item_has */ EIF_BOOLEAN F1188_15493 (EIF_REFERENCE Current, EIF_INTEGER_32 arg1) { GTCX RTEX; EIF_INTEGER_64 loc1 = (EIF_INTEGER_64) 0; EIF_BOOLEAN Result = ((EIF_BOOLEAN) 0); RTLD; RTLI(1); RTLR(0,Current); RTEAA("item_has", 1187, Current, 1, 1, 21174); RTGC; RTHOOK(1); loc1 = F1178_5561(Current); RTHOOK(2); Result = '\0'; if ((EIF_BOOLEAN) (F1188_15499(Current, loc1) <= arg1)) { Result = (EIF_BOOLEAN) (arg1 <= F1188_15500(Current, loc1)); } RTHOOK(3); RTLE; RTEE; return Result; }

    • Finnian Reilly (8 years ago 6/11/2015)

      It might also be useful to compare the C output for this routine class EL_SEQUENTIAL_INTERVALS feature -- Element change extend (a_lower, a_upper: INTEGER) require interval_after_last: not is_empty implies a_lower > last.upper local l_item: EL_INTEGER_INTERVAL do l_item.set_lower (a_lower); l_item.set_upper (a_upper) interval_extend (l_item) end

      INTEGER_64 version

      /* {EL_SEQUENTIAL_INTERVALS}.extend */ void F1188_15494 (EIF_REFERENCE Current, EIF_INTEGER_32 arg1, EIF_INTEGER_32 arg2) { GTCX RTEX; EIF_INTEGER_64 ti8_1; EIF_INTEGER_64 ti8_2; RTLD; RTLI(1); RTLR(0,Current); RTEAA("extend", 1187, Current, 0, 2, 21175); RTGC; RTHOOK(1); ti8_1 = (EIF_INTEGER_64) arg1; ti8_1 = eif_bit_shift_left(ti8_1,((EIF_INTEGER_32) 32L)); ti8_2 = (EIF_INTEGER_64) arg2; ti8_1 = eif_bit_or(ti8_1,ti8_2); F1178_5597(Current, ti8_1); RTHOOK(2); RTLE; RTEE; }

      Expanded class version

      /* {EL_SEQUENTIAL_INTERVALS}.extend */ void F1219_15489 (EIF_REFERENCE Current, EIF_INTEGER_32 arg1, EIF_INTEGER_32 arg2) { GTCX RTEX; struct eif_ex_1200 sloc1; EIF_REFERENCE loc1 = (EIF_REFERENCE) sloc1.data; EIF_REFERENCE tr1 = NULL; RTLD; memset (&sloc1.overhead, 0, OVERHEAD + _OBJSIZ_0_0_0_2_0_0_0_0_); sloc1.overhead.ov_flags = EO_EXP | EO_STACK; RT_DFS(&sloc1.overhead, 6); RTLI(3); RTLR(0,loc1); RTLR(1,tr1); RTLR(2,Current); RTEAA("extend", 1218, Current, 1, 2, 21476); RTGC; RTHOOK(1); F7_11158(RTCV(loc1), arg1); RTHOOK(2); F7_11159(RTCV(loc1), arg2); RTHOOK(3); tr1 = RTRCL(loc1); F1218_5597(Current, tr1); RTHOOK(4); RTLE; RTEE; }