|
| 1 | +/* ****************************************************************** |
| 2 | + * hist : Histogram functions |
| 3 | + * part of Finite State Entropy project |
| 4 | + * Copyright (c) Meta Platforms, Inc. and affiliates. |
| 5 | + * |
| 6 | + * You can contact the author at : |
| 7 | + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy |
| 8 | + * - Public forum : https://groups.google.com/forum/#!forum/lz4c |
| 9 | + * |
| 10 | + * This source code is licensed under both the BSD-style license (found in the |
| 11 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
| 12 | + * in the COPYING file in the root directory of this source tree). |
| 13 | + * You may select, at your option, one of the above-listed licenses. |
| 14 | +****************************************************************** */ |
| 15 | + |
| 16 | +/* --- dependencies --- */ |
| 17 | +#include <stddef.h> /* size_t */ |
| 18 | + |
| 19 | + |
| 20 | +/* --- simple histogram functions --- */ |
| 21 | + |
| 22 | +/*! HIST_count(): |
| 23 | + * Provides the precise count of each byte within a table 'count'. |
| 24 | + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). |
| 25 | + * Updates *maxSymbolValuePtr with actual largest symbol value detected. |
| 26 | + * @return : count of the most frequent symbol (which isn't identified). |
| 27 | + * or an error code, which can be tested using HIST_isError(). |
| 28 | + * note : if return == srcSize, there is only one symbol. |
| 29 | + */ |
| 30 | +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, |
| 31 | + const void* src, size_t srcSize); |
| 32 | + |
| 33 | +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ |
| 34 | + |
| 35 | + |
| 36 | +/* --- advanced histogram functions --- */ |
| 37 | + |
| 38 | +#define HIST_WKSP_SIZE_U32 1024 |
| 39 | +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) |
| 40 | +/** HIST_count_wksp() : |
| 41 | + * Same as HIST_count(), but using an externally provided scratch buffer. |
| 42 | + * Benefit is this function will use very little stack space. |
| 43 | + * `workSpace` is a writable buffer which must be 4-bytes aligned, |
| 44 | + * `workSpaceSize` must be >= HIST_WKSP_SIZE |
| 45 | + */ |
| 46 | +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
| 47 | + const void* src, size_t srcSize, |
| 48 | + void* workSpace, size_t workSpaceSize); |
| 49 | + |
| 50 | +/** HIST_countFast() : |
| 51 | + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. |
| 52 | + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` |
| 53 | + */ |
| 54 | +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
| 55 | + const void* src, size_t srcSize); |
| 56 | + |
| 57 | +/** HIST_countFast_wksp() : |
| 58 | + * Same as HIST_countFast(), but using an externally provided scratch buffer. |
| 59 | + * `workSpace` is a writable buffer which must be 4-bytes aligned, |
| 60 | + * `workSpaceSize` must be >= HIST_WKSP_SIZE |
| 61 | + */ |
| 62 | +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
| 63 | + const void* src, size_t srcSize, |
| 64 | + void* workSpace, size_t workSpaceSize); |
| 65 | + |
| 66 | +/*! HIST_count_simple() : |
| 67 | + * Same as HIST_countFast(), this function is unsafe, |
| 68 | + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. |
| 69 | + * It is also a bit slower for large inputs. |
| 70 | + * However, it does not need any additional memory (not even on stack). |
| 71 | + * @return : count of the most frequent symbol. |
| 72 | + * Note this function doesn't produce any error (i.e. it must succeed). |
| 73 | + */ |
| 74 | +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
| 75 | + const void* src, size_t srcSize); |
| 76 | + |
| 77 | +/*! HIST_add() : |
| 78 | + * Lowest level: just add nb of occurrences of characters from @src into @count. |
| 79 | + * @count is not reset. @count array is presumed large enough (i.e. 1 KB). |
| 80 | + @ This function does not need any additional stack memory. |
| 81 | + */ |
| 82 | +void HIST_add(unsigned* count, const void* src, size_t srcSize); |
0 commit comments