From d50b41acac3823ff00e970758183aa22695c6ff5 Mon Sep 17 00:00:00 2001 From: Abdelrahman Date: Sat, 15 Feb 2025 23:45:07 +0000 Subject: [PATCH] Add str8 split, rsplit, join, concat and concat_capped functions --- src/core/strings/str8/str8.c | 178 ++++++++++++++++++++++++++++++++++- src/core/strings/str8/str8.h | 15 ++- 2 files changed, 189 insertions(+), 4 deletions(-) diff --git a/src/core/strings/str8/str8.c b/src/core/strings/str8/str8.c index bcb3caf..a3189a0 100644 --- a/src/core/strings/str8/str8.c +++ b/src/core/strings/str8/str8.c @@ -20,7 +20,7 @@ Str8 *wapp_str8_buf_alloc(const Allocator *allocator, u64 capacity) { c8 *buf = wapp_mem_allocator_alloc(allocator, sizeof(c8) * capacity); if (!buf) { - wapp_mem_allocator_free(allocator, (void **)&str); + wapp_mem_allocator_free(allocator, (void **)&str, sizeof(Str8)); goto RETURN_STR8; } @@ -111,6 +111,46 @@ Str8RO wapp_str8_substr(Str8RO *str, u64 start, u64 end) { }; } +Str8 *wapp_str8_concat(const Allocator *allocator, Str8 *dst, Str8RO *src) { + if (!allocator || !dst || !src) { + return NULL; + } + + Str8 *output = NULL; + u64 remaining = dst->capacity - dst->size; + if (src->size <= remaining) { + output = dst; + goto COPY_STRING_STR8_CONCAT; + } + + u64 capacity = dst->capacity + src->size; + + output = wapp_str8_buf_alloc(allocator, capacity); + if (!output) { + goto RETURN_STR8_CONCAT; + } + + wapp_str8_concat_capped(output, dst); + +COPY_STRING_STR8_CONCAT: + wapp_str8_concat_capped(output, src); + +RETURN_STR8_CONCAT: + return output; +} + +void wapp_str8_concat_capped(Str8 *dst, Str8RO *src) { + if (!dst || !src) { + return; + } + + u64 remaining = dst->capacity - dst->size; + u64 to_copy = remaining < src->size ? remaining : src->size; + + memcpy(dst->buf + dst->size, src->buf, to_copy); + dst->size += to_copy; +} + i64 wapp_str8_find(Str8RO *str, Str8RO substr) { if (substr.size > str->size) { return -1; @@ -141,7 +181,141 @@ i64 wapp_str8_rfind(Str8RO *str, Str8RO substr) { return -1; } -Str8Node *wapp_str8_list_get(Str8List *list, u64 index) { +Str8List *wapp_str8_split_with_max(const Allocator *allocator, Str8RO *str, Str8RO *delimiter, i64 max_splits) { + if (!allocator || !str || !delimiter) { + return NULL; + } + + Str8List *output = wapp_mem_allocator_alloc(allocator, sizeof(Str8List)); + + if (delimiter->size > str->size) { + Str8 *full = wapp_str8_alloc_str8(allocator, str); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = full; + wapp_str8_list_push_back(output, node); + } + + goto RETURN_STR8_SPLIT; + } + + i64 start = 0; + i64 end = 0; + i64 splits = 0; + Str8 *rest = wapp_str8_alloc_str8(allocator, str); + Str8 *before_str; + + while ((end = wapp_str8_find(rest, *delimiter)) != -1) { + if (max_splits > 0 && splits >= max_splits) { + break; + } + + Str8RO before = wapp_str8_substr(str, start, start + end); + Str8RO after = wapp_str8_substr(str, start + end + delimiter->size, str->size); + + before_str = wapp_str8_alloc_str8(allocator, &before); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = before_str; + wapp_str8_list_push_back(output, node); + } + + wapp_mem_allocator_free(allocator, (void **)&rest, sizeof(Str8)); + rest = wapp_str8_alloc_str8(allocator, &after); + start += end + delimiter->size; + + ++splits; + } + + Str8RO last = wapp_str8_substr(str, start, str->size); + rest = wapp_str8_alloc_str8(allocator, &last); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = rest; + wapp_str8_list_push_back(output, node); + } + +RETURN_STR8_SPLIT: + return output; +} + +Str8List *wapp_str8_rsplit_with_max(const Allocator *allocator, Str8RO *str, Str8RO *delimiter, i64 max_splits) { + if (!allocator || !str || !delimiter) { + return NULL; + } + + Str8List *output = wapp_mem_allocator_alloc(allocator, sizeof(Str8List)); + + if (delimiter->size > str->size) { + Str8 *full = wapp_str8_alloc_str8(allocator, str); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = full; + wapp_str8_list_push_back(output, node); + } + + goto RETURN_STR8_SPLIT; + } + + i64 end = 0; + i64 splits = 0; + Str8 *rest = wapp_str8_alloc_str8(allocator, str); + Str8 *after_str; + + while ((end = wapp_str8_rfind(rest, *delimiter)) != -1) { + if (max_splits > 0 && splits >= max_splits) { + break; + } + + Str8RO before = wapp_str8_substr(rest, 0, end); + Str8RO after = wapp_str8_substr(rest, end + delimiter->size, str->size); + + after_str = wapp_str8_alloc_str8(allocator, &after); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = after_str; + wapp_str8_list_push_front(output, node); + } + + wapp_mem_allocator_free(allocator, (void **)&rest, sizeof(Str8)); + rest = wapp_str8_alloc_str8(allocator, &before); + + ++splits; + } + + Str8RO last = wapp_str8_substr(str, 0, rest->size); + rest = wapp_str8_alloc_str8(allocator, &last); + Str8Node *node = wapp_mem_allocator_alloc(allocator, sizeof(Str8Node)); + if (node) { + node->string = rest; + wapp_str8_list_push_front(output, node); + } + +RETURN_STR8_SPLIT: + return output; +} + +Str8 *wapp_str8_join(const Allocator *allocator, const Str8List *list, Str8RO *delimiter) { + if (!allocator || !list || !delimiter) { + return NULL; + } + + u64 capacity = list->total_size + (delimiter->size * (list->node_count - 1)); + Str8 *output = wapp_str8_buf_alloc(allocator, capacity); + + Str8Node *node; + for (u64 i = 0; i < list->node_count; ++i) { + node = wapp_str8_list_get(list, i); + wapp_str8_concat_capped(output, node->string); + if (i + 1 < list->node_count) { + wapp_str8_concat_capped(output, delimiter); + } + } + + return output; +} + +Str8Node *wapp_str8_list_get(const Str8List *list, u64 index) { if (index >= list->node_count) { return NULL; } diff --git a/src/core/strings/str8/str8.h b/src/core/strings/str8/str8.h index c1fbd8d..55d10bc 100644 --- a/src/core/strings/str8/str8.h +++ b/src/core/strings/str8/str8.h @@ -68,6 +68,8 @@ c8 wapp_str8_get(Str8RO *str, u64 index); void wapp_str8_set(Str8 *str, u64 index, c8 c); bool wapp_str8_equal(Str8RO *s1, Str8RO *s2); Str8RO wapp_str8_substr(Str8RO *str, u64 start, u64 end); +Str8 *wapp_str8_concat(const Allocator *allocator, Str8 *dst, Str8RO *src); +void wapp_str8_concat_capped(Str8 *dst, Str8RO *src); /** * Str8 find functions @@ -76,9 +78,18 @@ i64 wapp_str8_find(Str8RO *str, Str8RO substr); i64 wapp_str8_rfind(Str8RO *str, Str8RO substr); /** - * Str8 list functions + * Str8 split and join */ -Str8Node *wapp_str8_list_get(Str8List *list, u64 index); +#define wapp_str8_split(ALLOCATOR, STR, DELIMITER) wapp_str8_split_with_max(ALLOCATOR, STR, DELIMITER, -1) +#define wapp_str8_rsplit(ALLOCATOR, STR, DELIMITER) wapp_str8_rsplit_with_max(ALLOCATOR, STR, DELIMITER, -1) +Str8List *wapp_str8_split_with_max(const Allocator *allocator, Str8RO *str, Str8RO *delimiter, i64 max_splits); +Str8List *wapp_str8_rsplit_with_max(const Allocator *allocator, Str8RO *str, Str8RO *delimiter, i64 max_splits); +Str8 *wapp_str8_join(const Allocator *allocator, const Str8List *list, Str8RO *delimiter); + +/** + * Str8 list utilities + */ +Str8Node *wapp_str8_list_get(const Str8List *list, u64 index); void wapp_str8_list_push_front(Str8List *list, Str8Node *node); void wapp_str8_list_push_back(Str8List *list, Str8Node *node); void wapp_str8_list_insert(Str8List *list, Str8Node *node, u64 index);