8
\$\begingroup\$

I write in C for several projects (e.g. learning / teaching,coding competitions, data processing) and frequently needarrays (e.g. strings) with fast appending / concatenation / reversing.

I've isolated the code below from a project. For simplicity,I assume all arguments are valid, including no pointer aliasing(for each function, no two arguments point to overlapping blocks of memory).I use a doubling strategy on reallocation.

I understand thatvector andstring from C++ likelyhave superior performance / features - this is for settings where a simple C solution is more accessible oreasier to modify for the specific context of the problem.

stringi.h

#include <assert.h>// contiguous dynamic array for fixed-size type T// note: v[] is not terminated (no '\0')typedef struct {    char *v; // T *v (testing: T = char)    size_t count; // count <= capacity    size_t capacity; // sizeof(v) / sizeof(T)} stringi_raw;// safe non-method mutations:// 1. modifying v[i] for indices i//      e.g. v[i] = 'a'; ++v[i];// 2. if n <= count, assigning n to count will keep// the first n elements; beware of underflow!//      e.g. while (count > 0) --count;// maximum capacity: capacity <= CAP_MAX// practical: CAP_MAX = SIZE_MAX / sizeof(T)static const size_t CAP_MAX = 987654321;// pointer to a stringi_rawtypedef stringi_raw *stringi;// initializes a stringi with 0 elements// to be deallocated by stringi_freestringi stringi_init();// frees memory used by target from the heapvoid stringi_free(stringi target);// allow n items to be stored without realloc// note: assumes n <= CAP_MAXvoid stringi_reserve(stringi target, size_t n);// appends c to the end of target// note: takes O(1) amortized timevoid stringi_put(stringi target, char c);// appends the contents of src to dstvoid stringi_cat(stringi dst, const stringi src);// reverses target->vvoid stringi_reverse(stringi target);// aka "reverse(src); cat(dst, src); reverse(src)"void stringi_cat_rev(stringi dst, const stringi src);

stringi.c

#include <stdlib.h>#include "stringi.h"stringi stringi_init(){    stringi target = malloc(sizeof(stringi_raw));    assert(target != NULL);    target->v = NULL; // realloc(NULL, x) = malloc(x)    target->count = 0;    target->capacity = 0;    return target;}void stringi_free(stringi target){    free(target->v); // free(NULL) is safe    free(target);}// sets the capacity of target to cap// note: assumes count <= cap <= CAP_MAXvoid stringi_set_cap(stringi target, size_t cap){    void *v = realloc(target->v, cap * sizeof(char));    assert(v != NULL);    target->v = v;    target->capacity = cap;}// get the next capacity from capsize_t double_or_max(size_t cap){    if (cap > CAP_MAX / 2)        return CAP_MAX;    return 2 * cap;}void stringi_reserve(stringi target, size_t n){    size_t cap = target->capacity;    if (n > cap)    {        if (cap == 0)            cap = 1;        else        {            cap = double_or_max(cap);            while (n > cap)                cap = double_or_max(cap);           }        stringi_set_cap(target, cap);    }}void stringi_put(stringi target, char c){    size_t n = target->count;    assert(n < CAP_MAX);    size_t n_fin = n + 1;    stringi_reserve(target, n_fin);        target->v[n] = c;    target->count = n_fin;}void stringi_cat(stringi dst, const stringi src){    size_t n_d = dst->count;    size_t n_s = src->count;    assert(n_d <= CAP_MAX - n_s);    size_t n_fin = n_d + n_s;    stringi_reserve(dst, n_fin);    for (size_t i = 0; i < n_s; ++i)        dst->v[n_d + i] = src->v[i];    dst->count = n_fin;}// if min <= max, reverse v[min] to v[max] inclusivevoid chars_reverse(char *v, size_t min, size_t max){    while (min < max)    {        char temp = v[min];        v[min] = v[max];        v[max] = temp;        ++min;        --max;    }}void stringi_reverse(stringi target){    // avoid underflow when count = 0    size_t count = target->count;    if (count > 0)        chars_reverse(target->v, 0, count - 1);}void stringi_cat_rev(stringi dst, const stringi src){    size_t n_d = dst->count;    size_t n_s = src->count;    assert(n_d <= CAP_MAX - n_s);    size_t n_fin = n_d + n_s;    stringi_reserve(dst, n_fin);    for (size_t i = 0; i < n_s; ++i)        dst->v[n_fin - 1 - i] = src->v[i];    dst->count = n_fin;}

Tests

#include <stdio.h>#include <limits.h> #include <string.h> #include <time.h> #include "stringi.h"void stringi_remove(stringi *target){    stringi_free(*target);    *target = NULL;}void stringi_print(stringi target){    printf("'%.*s' (%zu/%zu)\n",        (unsigned int) target->count, target->v,         target->count, target->capacity);}void stringi_validate(stringi target, char *expected){    size_t n = strlen(expected);    assert(n == target->count);    assert(0 == strncmp(expected, target->v, n));}int diff_to_ms(clock_t diff){    return diff * 1000 / CLOCKS_PER_SEC;}clock_t time_putchar(stringi target){    clock_t start = clock();    for (size_t i = 0; i < CAP_MAX; ++i)        stringi_put(target, 'c');    return clock() - start;}int main(){    assert(CAP_MAX >= 1);    assert(CAP_MAX <= SIZE_MAX / sizeof(char));    stringi s1 = stringi_init();    stringi_validate(s1, "");    printf("create: s1 = %p\n", s1);    printf("\nexpanding s1:\n");    stringi_reverse(s1);    stringi_validate(s1, "");    for (int i = 0; i < 13; ++i)    {        stringi_print(s1);        stringi_put(s1, 'a' + i);    }    stringi_validate(s1, "abcdefghijklm");    stringi_print(s1);    stringi s2 = stringi_init();    stringi_validate(s2, "");    printf("\ncreate: s2 = %p\n", s2);    for (int i = 13; i < 26; ++i)        stringi_put(s2, 'a' + i);    stringi_validate(s2, "nopqrstuvwxyz");    stringi_cat(s1, s2);    stringi_validate(s1,         "abcdefghijklmnopqrstuvwxyz");    stringi_reverse(s1);    stringi_validate(s1,        "zyxwvutsrqponmlkjihgfedcba");    s2->count = 2;    stringi_validate(s2, "no");    stringi_cat(s1, s2);    stringi_validate(s1,         "zyxwvutsrqponmlkjihgfedcbano");    stringi_cat_rev(s1, s2);    stringi_validate(s1,        "zyxwvutsrqponmlkjihgfedcbanoon");    stringi_remove(&s1);    printf("remove: s1 = %p\n", s1);    stringi_remove(&s2);    printf("remove: s2 = %p\n", s2);    s1 = stringi_init();    stringi_validate(s1, "");    printf("create: s1 = %p\n", s1);    s2 = stringi_init();    stringi_validate(s2, "");    printf("create: s2 = %p\n", s2);    int diff1 = diff_to_ms(time_putchar(s1));    printf("\ns1 += CAP_MAX * 'c': %d ms\n", diff1);    stringi_cat(s1, s2);    // expect error: exceeding CAP_MAX    // stringi_put(s1, 'c');    s1->count = 0;    stringi_validate(s1, "");    printf("clear s1:\n");    stringi_print(s1);    stringi_remove(&s1);    stringi_reserve(s2, CAP_MAX);    printf("\nreserve for s2: CAP_MAX\n");    int diff2 = diff_to_ms(time_putchar(s2));    printf("s2 += CAP_MAX * 'c': %d ms\n", diff2);    s2->count = 0;    stringi_validate(s2, "");    printf("clear s2:\n");    stringi_print(s2);    stringi_remove(&s2);    return 0;}

Output

create: s1 = 0000015E9D66F050expanding s1:'' (0/0)'a' (1/1)'ab' (2/2)'abc' (3/4)'abcd' (4/4)'abcde' (5/8)'abcdef' (6/8)'abcdefg' (7/8)'abcdefgh' (8/8)'abcdefghi' (9/16)'abcdefghij' (10/16)'abcdefghijk' (11/16)'abcdefghijkl' (12/16)'abcdefghijklm' (13/16)create: s2 = 0000015E9D66EEB0remove: s1 = 0000000000000000remove: s2 = 0000000000000000create: s1 = 0000015E9D66EF90create: s2 = 0000015E9D66F110s1 += CAP_MAX * 'c': 1120 msclear s1:'' (0/987654321)reserve for s2: CAP_MAXs2 += CAP_MAX * 'c': 931 msclear s2:'' (0/987654321)

I would appreciate any thoughts on performance, readability, or modifiability.

qwr's user avatar
qwr
1,2331 gold badge8 silver badges26 bronze badges
askedOct 3, 2024 at 3:03
Justin Chang's user avatar
\$\endgroup\$
13
  • 1
    \$\begingroup\$A short note, not worth an answer. Toby below pointed out your use ofassert is wrong. In the functionstringi_set_cap you have the comment "assumes count <= cap <= CAP_MAX". That assumption should be checked with an assert. That is exactly what asserts are for. It'll be there when debugging, so the user can see when they call your functions in the wrong way, but won't be there in production, to not slow down the function.\$\endgroup\$CommentedOct 3, 2024 at 15:41
  • 2
    \$\begingroup\$I understand that vector and string from C++ likely have superior performance - not for large reallocations,where this is about 2x faster with 70x fewer page faults. Crealloc is far superior to C++new/delete which omits any way to grow an existing allocation. Of course you need link-time optimization to inline across C source files to reduce per-push overhead since you put all your tiny functions in a separate.c instead of directly in the.h withstatic inline (orinline plus a.c withextern inline instantiation).\$\endgroup\$CommentedOct 4, 2024 at 4:47
  • \$\begingroup\$@PeterCordes Your comment intrigued me so much that I decided to benchmark vsstd::vector for 7 billionpush_back operations. The code above takes56176 ms without optimizations and18947 ms with optimizations, whilestd::vector takes485778 ms without optimizations and10968 ms with optimizations. Unfortunately, the optimized executable for my code appears to be causing a ton of branch mispredictions (I have no idea what magic causesstd::vector to avoid this).\$\endgroup\$CommentedOct 4, 2024 at 6:53
  • \$\begingroup\$Benchmarking without optimization is not interesting, especially for heavily templated C++ code that relies on a lot of inlining of tiny functions. (Idiomatic way of performance evaluation?). Did you compile with clang or gcc-flto orgcc -fwhole-program to allow cross-file inlining from yourstringi.c, if your benchmark was in a different file? Can you link your benchmark code, e.g. ongodbolt.org (with compiler version/options you actually used) so I can see what you did and maybe profile it on my own Skylake system? Also what CPU?\$\endgroup\$CommentedOct 4, 2024 at 7:24
  • 1
    \$\begingroup\$Anyway, your benchmark does a modulo with% 64 every iteration, which costs amovabs r64, imm64 and a 64-bitmul and some shift / LEA for each byte, which is maybe a similar amount of overhead to the bookkeeping to check if realloc is needed. I'm seeing negligible branch mispredicts withperf stat on Skylake, miss rate is 0.00%. IPC is 3.41 instructions per clock. It's not getting optimized away despite not doing anything with the results except read the count, at least not by GCC. Clang can be more aggressive about optimizing away alloc/free even for code that writes the to buffer.\$\endgroup\$CommentedOct 5, 2024 at 1:57

3 Answers3

13
\$\begingroup\$

I don't like this typedef:

// pointer to a stringi_rawtypedef stringi_raw *stringi;

In C, it's important to know whether any value is a pointer or a value object. Obscuring this distinction is harmful to my understanding.


Thisassert() is unfounded:

stringi target = malloc(sizeof(stringi_raw));assert(target != NULL);

Assertions exist to tell us something that'strue by design. However, we know thatmalloc()can return a null pointer, and that's something that we need to handle - even (or especially) in production builds when automatic checking of our assertions is disabled by settingNDEBUG. If our assertion is untrue, we proceed straight into undefined behaviour, and that's not something we should inflict on our users.


stringi_free() is safe when passed a string with no storage, but (unlikefree()), it can't be passed a null pointer. We could document that itstarget argument must be valid, but I think it's more helpful to users to just deal with null pointers:

void stringi_free(stringi target){    if (!target) { return; }    ⋮}

stringi.h includes<assert.h>. But that's not needed here - don't impose that cost on every translation unit that includesstringi.h.

Conversely, it is invalid if there isn't a definition ofsize_t in scope before it's included.

Ifstringi.h is included multiple times in a translation unit, I think we get errors from redefiningstringi_raw. And an include guard to avoid that problem.


Instringi.c, we have

#include <stdlib.h>#include "stringi.h"

This explains why we didn't spot the need forstringi.h to havesize_t in scope. If we include our header first, then that gives us confidence that it's complete and not dependent on any prior declarations.


The tests demonstrate the functionality, but always return a success status. Better tests areself-checking, returning failure status if any of the operations don't produce the expected result.

answeredOct 3, 2024 at 7:29
Toby Speight's user avatar
\$\endgroup\$
3
  • 2
    \$\begingroup\$Good answer. Note thatstringi_free() assumes that the pointer argument is not null, so it is coupled to the behaviors of theassert()s aftermalloc() andrealloc().\$\endgroup\$CommentedOct 3, 2024 at 16:29
  • 2
    \$\begingroup\$Appreciate the answer. I will likely alter the design so that it returns NULL for failedstringi_init() and int error codes for failed extending, leaving theasserts to the users. This would also make it easier to test failures by assert. And the order of includes is a great catch.\$\endgroup\$CommentedOct 3, 2024 at 17:03
  • 1
    \$\begingroup\$Thanks @Nayuki - I've added a note aboutstringi_free().\$\endgroup\$CommentedOct 10, 2024 at 7:15
10
\$\begingroup\$

A few small items:

  • stringi stringi_init(): In C, anempty parameter list means any number of arguments, not zero arguments. Change this tostringi stringi_init(void). (This is not necessary in C++ or starting in C23.)

  • void chars_reverse(char *v, size_t min, size_t max): This function is only used within stringi.c, so make it "private" by addingstatic in front, like:static void chars_reverse(...).

  • void stringi_cat(...): Replace the for-loop withmemcpy():

    memcpy(dst->v + n_d, src->v, n_s);
  • void stringi_reserve(...): Use a do-while loop:

    do cap = double_or_max(cap);while (n > cap);
  • It seems like the canonical definition ofsize_t exists instddef.h.

answeredOct 3, 2024 at 15:30
Nayuki's user avatar
\$\endgroup\$
1
  • \$\begingroup\$Appreciate it! The do while is a lot cleaner.\$\endgroup\$CommentedOct 3, 2024 at 17:55
4
\$\begingroup\$

Review

You've named a few possibilities, but, really, where do you think you will actually use these functions (as a group)?

Every C programmerknows that C strings end with NUL terminators. These don't. These represent a reversion to Pascal conventions (each 'string' was prefixed with a hidden byte count only the compiler and Pascal libraries knew about.) C is not Pascal, and stepping away from this precept voids the utility ofmany tried and tested C Standard Library functions.

Example:
printf( "%s\n", "foobar" );
becomes
printf( "%.*s\n", ptr->len, (char*)ptr->val );
Animprovement? Workable??Andputs() is no longer useful, at all!

In C, a typical application involving lots of strings might be a "dictionary" whose individual words are accessible via an array of pointers...
With a modern compiler,sizeof( char * ) is typically 8 bytes.
The per-entry overhead of this proposal is (likely) 24 bytes!

I have a small English wordlist (~8000 entries of varying length). Running a quick test on that sample, a conventional array of pointers into a block of ragged length C strings required about 90,000 bytes of storage. Summing 24 bytes of overhead with the 'rounded up' allocation requirements for each of these 'Pascal strings' was twice as much... In blocks of powers of 2, the requirement is always the worst of its cohort. This is not good.

Plus, there is no demonstrated awareness of the overheads ofmalloc() adding to the overheads of the "string fragments" represented by this code. Where's the advantage?


reVerSe vs reSerVe

Sorry, but I'm annoyed that these two different operations, with their too-similar spellings, have been "packaged-up" together.

How useful isreverse(), really?
It's such a tiny & simple function (unrelated to memory allocation) that I'd feel better if it was not present in this collection.

Various SO questions, in the past, used "reverse in place" that tookfirst andlast parameters toreverse substrings within a given string. This had more utility, imo.


Summary

Without having made careful evaluation of every line of code, it looks like most of this package is nicely laid-out, readable and probably does what it needs to do.

However... Looking back over many years of programming, I cannot recall ever needing the functionality ofstringi_catrev(). (Where is its partner_prfrev() that reverses the first string before appending the second string to it? Wouldn't the aforementioned (simple) 'reverse substring' be able to replace some of this?)

Most of these functions are little more than 'aliases'; wrappers for conventional C functions familiar to all who've already learned the language.

answeredJan 21 at 9:08
\$\endgroup\$

You mustlog in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.