String functions in C language that are not present in standard C library.

From: Amit
Date: Mon Apr 25 2022 - 03:35:36 EST


Hi,

I have written few functions for manipulating strings that are not
present in standard C library.

I am sending it to linux kernel mailing list, in case the linux kernel
needs them.

The main functions are:

## char *get_input_from_stdin_and_discard_extra_characters(char
*str, long size);
## char **str_split(const char *str, const char *delim, long max_splits);
## char *str_join(unsigned int skip_null_and_empty_input_strings,
const char *delim, long num_args, ...);
## char *substr(const char *str, long start_index, long end_index);

--------------------
string_library.c
--------------------


/*
* Author: Amit Choudhary
*
* Email: amitchoudhary0523 at gmail dot com
*
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#include "string_library.h"

/* The description of functions in this file is in the header file
"string_library.h". */

static void free_all_allocated_memory(char **strings_array, long n);
static char **transform_str_to_string_array(const char *str);

char *get_input_from_stdin_and_discard_extra_characters(char *str, long size)
{

int c = 0;
long i = 0;

// If 'size' is 0 then this function will discard all input and return NULL.
// No need to check 'str' if 'size' is 0.
if (size == 0) {
// discard all input
while ((c = getchar()) && (c != '\n') && (c != EOF));
return NULL;
}

if (!str)
return str;

if (size < 0)
return NULL;

for (i = 0; i < (size - 1); i = i + 1) {

c = getchar();

if ((c == '\n') || (c == EOF)) {
str[i] = 0;
return str;
}

str[i] = (char)(c);

} // end of for loop

str[i] = 0;

// discard rest of input
while ((c = getchar()) && (c != '\n') && (c != EOF));

return str;

} // end of get_input_from_stdin_and_discard_extra_characters

char **str_split(const char *str, const char *delim, long max_splits)
{

char **output_strings_array = NULL;
char *temp = NULL;
char *prev_temp = NULL;
long num_tokens = 0;
size_t delim_len = 0;
size_t len = 0;
long i = 0;

if ((!str) || (!*str))
return NULL;

if ((!delim) || (!*delim))
return transform_str_to_string_array(str);

if (max_splits == 0)
return transform_str_to_string_array(str);

// handle special case where delim does not occur in str
if (strstr(str, delim) == NULL)
return transform_str_to_string_array(str);

delim_len = strlen(delim);

temp = (char *)(str);
prev_temp = (char *)(str);

while (1) {

temp = strstr(temp, delim);

num_tokens = num_tokens + 1;

if (!temp)
break;

temp = temp + delim_len;
prev_temp = temp;

} // end of while loop

if ((max_splits > 0) && (max_splits < num_tokens))
num_tokens = max_splits + 1;

// allocate 1 extra character pointer to terminate output_strings_array with
// a NULL pointer.
output_strings_array = calloc((size_t)(num_tokens) + 1,
(sizeof(*output_strings_array)));
if (!output_strings_array)
return NULL;

temp = (char *)(str);
prev_temp = (char *)(str);
i = 0;

while (1) {

temp = strstr(temp, delim);

len = (size_t)(temp - prev_temp);

// allocate 1 extra byte for null terminator
output_strings_array[i] = malloc(len + 1);
if (!output_strings_array[i]) {
free_all_allocated_memory(output_strings_array, i);
return NULL;
}

memmove(output_strings_array[i], prev_temp, len);
(output_strings_array[i])[len] = 0;
i = i + 1;

temp = temp + delim_len;
prev_temp = temp;

if ((num_tokens - i) == 1) { // last token

len = (size_t)(str + strlen(str) - prev_temp);

// allocate 1 extra byte for null terminator
output_strings_array[i] = malloc(len + 1);
if (!output_strings_array[i]) {
free_all_allocated_memory(output_strings_array, i);
return NULL;
}

memmove(output_strings_array[i], prev_temp, len);
(output_strings_array[i])[len] = 0;
i = i + 1;

break;

} // end of if ((num_tokens - i) == 1)

} // end of while loop

output_strings_array[i] = 0;

return output_strings_array;

} // end of str_split

/*
* static char **transform_str_to_string_array(const char *str):
*
* Function transform_str_to_string_array() basically allocates a
pointer to pointer
* to character (means a pointer to an array of strings/elements). This array of
* strings have two elements - the first element is a pointer to a copy of 'str'
* and the second element is a NULL pointer/string/element.
*
* This is a static function and this function should not be called from outside
* this file.
*/
static char **transform_str_to_string_array(const char *str)
{

char **output_strings_array = NULL;
size_t num_tokens = 1;
size_t len = strlen(str);

// allocate 1 extra character pointer to terminate output_strings_array with
// a NULL pointer.
output_strings_array = calloc(num_tokens + 1,
(sizeof(*output_strings_array)));
if (!output_strings_array)
return NULL;

// allocate 1 extra byte for null terminator
output_strings_array[0] = malloc(len + 1);
if (!output_strings_array[0]) {
free(output_strings_array);
return NULL;
}

memmove(output_strings_array[0], str, len);
(output_strings_array[0])[len] = 0;

output_strings_array[num_tokens] = 0;

return output_strings_array;

} // end of transform_str_to_string_array

/*
* static void free_all_allocated_memory(char **strings_array, long n):
*
* Function free_all_allocated_memory() frees all elements of the
array of strings
* that is passed to this function. It also frees the pointer to the array of
* strings ('strings_array').
*
* This is a static function and this function should not be called from outside
* this file.
*/
static void free_all_allocated_memory(char **strings_array, long n)
{

long i = 0;

if (!strings_array)
return;

for (i = 0; i < n; i = i + 1) {
free(strings_array[i]);
}

free(strings_array);

} // end of free_all_allocated_memory

void print_strings_array(char **strings_array)
{

long i = 0;

printf("Tokens are printed below (within single quotes):\n\n");
printf("---- Start of Tokens ----\n");

if (!strings_array) {
printf("---- End of Tokens ----\n\n");
return;
}

while (strings_array[i]) {
printf("'%s'\n", strings_array[i]);
i = i + 1;
}

printf("---- End of Tokens ----\n\n");

} // end of print_strings_array

void free_strings_array(char **strings_array)
{

long i = 0;

if (!strings_array)
return;

while (strings_array[i]) {
free(strings_array[i]);
i = i + 1;
}

free(strings_array);

} // end of free_strings_array

long get_number_of_strings_in_strings_array(char **strings_array)
{

long i = 0;

if (!strings_array)
return 0;

while (strings_array[i]) {
i = i + 1;
}

return i;

} // end of get_number_of_strings_in_strings_array

char *str_join(unsigned int skip_null_and_empty_input_strings, const
char *delim, long num_args, ...)
{

va_list valist;
long i = 0;
size_t iica = 0; // iica - index into character array
size_t len = 0;
size_t delim_len = 0;
size_t total_len = 0;
long num_delim_to_concat = -1;
char *new_char_array = NULL;
char *temp = NULL;

if (num_args <= 0)
return NULL;

if (delim) {
delim_len = strlen(delim);
}

va_start(valist, num_args);
for (i = 0; i < num_args; i++) {

temp = va_arg(valist, char *);

if (skip_null_and_empty_input_strings) {
if ((!temp) || (!*temp))
continue;
}

if ((!temp) || (!*temp))
len = 0;
else
len = strlen(temp);

total_len = total_len + len;
num_delim_to_concat = num_delim_to_concat + 1;
if (num_delim_to_concat > 0)
total_len = total_len + delim_len;

}
va_end(valist);

if (total_len == 0)
return NULL;

total_len = total_len + 1; // 1 extra for terminating null byte

new_char_array = malloc(total_len);
if (!new_char_array)
return NULL;

va_start(valist, num_args);
for (i = 0; i < num_args; i++) {

temp = va_arg(valist, char *);

if (skip_null_and_empty_input_strings) {
if ((!temp) || (!*temp))
continue;
}

if ((!temp) || (!*temp))
len = 0;
else
len = strlen(temp);

memmove(&(new_char_array[iica]), temp, len);
iica = iica + len;

if (num_delim_to_concat > 0) {
memmove(&(new_char_array[iica]), delim, delim_len);
iica = iica + delim_len;
num_delim_to_concat = num_delim_to_concat - 1;
}

}
va_end(valist);

new_char_array[iica] = 0;

return new_char_array;

} // end of str_join

char *substr(const char *str, long start_index, long end_index)
{

char *substring = NULL;
long len = 0;
long substr_len = 0;

if ((!str) || (!*str))
return NULL;

if ((start_index < 0) || (end_index < 0) || (end_index < start_index))
return NULL;

len = (long)(strlen(str));

if ((start_index > (len - 1)) || (end_index > (len - 1)))
return NULL;

substr_len = end_index - start_index + 1;

substring = malloc((size_t)(substr_len + 1)); // extra 1 byte for null byte
if (!substring)
return NULL;

memmove(substring, str + start_index, (size_t)(substr_len));
substring[substr_len] = 0;

return substring;

} // end of substr

-------------------
string library.h
-------------------


/*
* Author: Amit Choudhary
*
* Email: amitchoudhary0523 at gmail dot com
*
*/

#ifndef _STRING_LIBRARY_H_
#define _STRING_LIBRARY_H_

/*
* get_input_from_stdin_and_discard_extra_characters(char *str, long size):
*
* Function get_input_from_stdin_and_discard_extra_characters() reads at most
* 'size - 1' characters into 'str' from stdin and then appends the null
* character ('\0'). If 'size' is 0 then this function will discard all input
* and return NULL. So, to discard all input, this function can be called with
* 'str' having value NULL and 'size' having value 0.
* In all cases, reading input stops after encountering a newline ('\n') or EOF
* even if 'size - 1' characters have not been read. If a newline ('\n') or EOF
* is read then it is replaced by null character ('\0'). If there are extra
* characters in input, they are read and discarded.
* In all cases, 'str' or NULL is returned.
*/
char *get_input_from_stdin_and_discard_extra_characters(char *str, long size);

/*
* char **str_split(const char *str, const char *delim, long max_splits):
*
* Function str_split() splits a string ('str') into tokens. It uses the 'delim'
* string to split 'str' into tokens. If a 'delim' is found at
position "i", then
* the token ends at position "i - 1".
*
* If there are "n" 'delim' in 'str' then "n + 1" tokens are generated/returned.
* However, some or all of these tokens may be empty strings. For example, if
* 'str' contains only a single 'delim' then two empty tokens are generated.
*
* The reason that empty tokens are returned is that some users may want empty
* tokens. One use case is that, if they are splitting records from a file to
* insert in a database, then when an empty token is found, then they can insert
* NULL value or 0 or empty string, etc. in that column.
*
* Users who don't want empty tokens can skip them by testing which
token is empty
* and which is not.
*
* The return value of this function is a pointer to pointer to character (means
* a pointer to an array of strings/elements). This array of strings
is terminated
* by a NULL pointer/string/element which means that the last element in this
* strings of array is a NULL pointer/string. So, you can loop through
this array
* of strings until you get a NULL pointer/string.
*
* The code of looping through this array of strings is:
*
* long i = 0;
* while (strings_array[i]) {
* ..do stuff here..
* i = i + 1;
* }
*
* The above can be achieved using a for loop also:
*
* long i = 0;
* for (i = 0; strings_array[i]; i = i + 1) {
* ..do stuff here..
* }
*
* If you want to skip the empty tokens then the following would be the code for
* looping through this array of strings:
*
* long i = 0;
* while (strings_array[i]) {
* if (!*(strings_array[i])) {
* i = i + 1;
* continue;
* }
* ..do stuff here..
* i = i + 1;
* }
*
* The above can be achieved using a for loop also:
*
* long i = 0;
* for (i = 0; strings_array[i]; i = i + 1) {
* if (!*(strings_array[i]))
* continue;
* ..do stuff here..
* }
*
*
* If 'str' is NULL or empty then NULL is returned. NULL is also
returned if memory
* was not available. To find out what exactly happened, the user can
check whether
* 'str' is NULL or empty. In case, 'str' is neither NULL nor empty
then it means
* that memory was not available.
*
* 'max_splits' argument is used to control hwo many times 'str'
should be split.
* If 'max_splits' is less than the number of tokens that would be
ideally generated
* then the number of tokens is reduced to "max_splits + 1". If max_splits is
* negative then it means that all tokens should be returned.
*
* If 'max_splits' is 0 or 'delim' is NULL or empty string or 'delim'
is not found
* in 'str' then an array of strings is returned which will have two elements -
* the first element will be a pointer to a copy of 'str' and the second element
* will be a NULL pointer/string/element.
*
* The return value of this function is a pointer to pointer to character (means
* a pointer to an array of strings/elements) and it had been allocated using
* malloc, so it is user's responsibility to free this memory. The user can
* use the function free_strings_array() to free the strings_array returned by
* this function.
*/
char **str_split(const char *str, const char *delim, long max_splits);

/*
* void print_strings_array(char **strings_array):
*
* Function print_strings_array() prints all the string elements of
'strings_array'.
*/
void print_strings_array(char **strings_array);

/*
* void free_strings_array(char **strings_array):
*
* Function free_strings_array() frees all the string elements of
'strings_array'.
* It also frees 'strings_array'.
*/
void free_strings_array(char **strings_array);

/*
* long get_number_of_strings_in_strings_array(char **strings_array):
*
* Function get_number_of_strings_in_strings_array() returns the count of number
* of elements in 'strings_array'. It is assumed that this array of strings is
* terminated by a NULL pointer/string/element.
*
*/
long get_number_of_strings_in_strings_array(char **strings_array);

/*
* char *str_join(unsigned int skip_null_and_empty_input_strings,
const char *delim, long num_args, ...):
*
* Parameters:
*
* num_args: number of variable arguments that are passed to this function
* excluding the 'delim' string.
* ...: Variable number of "char *" pointers.
*
* Description:
*
* Function str_join() concatenates all the strings/character arrays passed
* to it. If 'delim' is not NULL or not empty then between every
two strings,
* the 'delim' string is concatenated.
*
* If skip_null_and_empty_input_strings is zero then this means
that NULL/empty
* strings should be considered valid strings for the purpose of
concatenating
* 'delim' string - this means that if there is a NON-NULL/non-empty string
* in the variable arguments list which is then followed or preceded by a
* NULL/empty string then one 'delim' string will be concatenated between
* NON-NULL/non-empty string and NULL/empty string. This can be useful in
* case columns of a database are concatenated to form a record which will
* then be written in a file - so here, a column containing
NULL/empty value
* will be represented as empty by having two consecutive 'delim' strings.
*
* If skip_null_and_empty_input_strings is non-zero then this means that
* NULL/empty strings should be skipped and no 'delim' string should be
* concatenated for them.
*
* Function str_join() allocates a new character array whose size is equal
* to the sum of the lengths of all strings passed to it plus 1 (extra 1
* for terminating null byte). It then concatenates all the strings passed
* to it (these strings are separated by 'delim' string but
please see above
* for NULL/empty strings) into the newly allocated character
array and then
* returns the pointer to the newly allocated character array. If memory
* allocation fails then NULL is returned.
*
* It is the responsibility of the caller to free the allocated
memory (that
* is, to free the returned pointer from this function).
*/
char *str_join(unsigned int skip_null_and_empty_input_strings, const
char *delim, long num_args, ...);

/*
* char *substr(const char *str, long start_index, long end_index):
*
* Function substr() allocates memory and returns a pointer to a
string / character
* array which is a substring of 'str' starting from index 'start_index' till
* 'end_index' (inclusive). This substring is terminated by null byte
at the end.
* If 'str' is NULL or 'str' is empty or 'start_index' is less than 0
or 'end_index'
* is less than 0 or 'end_index' is less than 'start_index' or 'start_index' is
* greater than length of 'str' - 1 or 'end_index' is greater than length of
* 'str' - 1 then NULL is returned.
*
* The returned pointer points to a memory region containing the
substring and this
* memory region was allocated using malloc. So, it is the user's
responsibility to
* free the allocated memory.
*
*/
char *substr(const char *str, long start_index, long end_index);

#endif

Regards,
Amit