-
Notifications
You must be signed in to change notification settings - Fork 4.2k
examples : add wer cli example #2990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
danbev
wants to merge
3
commits into
ggml-org:master
Choose a base branch
from
danbev:wer-example
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+375
−0
Draft
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#include "wer.h" | ||
|
||
#include <cstdio> | ||
#include <iostream> | ||
#include <vector> | ||
#include <string> | ||
#include <algorithm> | ||
#include <sstream> | ||
#include <tuple> | ||
|
||
std::vector<std::string> split_into_words(const std::string& text) { | ||
std::vector<std::string> words; | ||
std::stringstream ss(text); | ||
std::string word; | ||
|
||
while (ss >> word) { | ||
words.push_back(word); | ||
} | ||
|
||
return words; | ||
} | ||
|
||
std::tuple<int, int, int> count_edit_ops(const std::vector<std::string>& reference, | ||
const std::vector<std::string>& actual) { | ||
int m = reference.size(); | ||
int n = actual.size(); | ||
|
||
// Levenshtein matrix | ||
std::vector<std::vector<int>> l_matrix(m + 1, std::vector<int>(n + 1, 0)); | ||
|
||
// Initialize the first row and column of the matrix. | ||
for (int i = 0; i <= m; i++) { | ||
l_matrix[i][0] = i; | ||
} | ||
|
||
for (int j = 0; j <= n; j++) { | ||
l_matrix[0][j] = j; | ||
} | ||
|
||
// Fill the matrix. | ||
for (int i = 1; i <= m; i++) { | ||
for (int j = 1; j <= n; j++) { | ||
if (reference[i-1] == actual[j-1]) { | ||
l_matrix[i][j] = l_matrix[i-1][j-1]; | ||
} else { | ||
l_matrix[i][j] = 1 + std::min({ | ||
l_matrix[i-1][j], // Deletion (top/above) | ||
l_matrix[i][j-1], // Insertion (left) | ||
l_matrix[i-1][j-1] // Substitution (diagonal) | ||
}); | ||
} | ||
} | ||
} | ||
|
||
// Start backtracking from the bottom-right corner of the matrix. | ||
int i = m; // rows | ||
int j = n; // columns | ||
|
||
int substitutions = 0; | ||
int deletions = 0; | ||
int insertions = 0; | ||
|
||
// Backtrack to find the edit operations. | ||
while (i > 0 || j > 0) { | ||
if (i > 0 && j > 0 && reference[i-1] == actual[j-1]) { | ||
// Recalll that reference and actual are vectors, so this is just checking | ||
// the same position in both to see if they are equal. If they are equal | ||
// this means there was no edit operation, so we move diagonally. | ||
i--; | ||
j--; | ||
} else if (i > 0 && j > 0 && l_matrix[i][j] == l_matrix[i-1][j-1] + 1) { | ||
// Check the if the current cell is equal to the diagonal cell + 1 | ||
// (for the operation cost), which means we have a substitution. | ||
substitutions++; | ||
i--; | ||
j--; | ||
} else if (i > 0 && l_matrix[i][j] == l_matrix[i-1][j] + 1) { | ||
// Check if the current cell is equal the top/above cell + 1 | ||
// (for the operation cost) which means we have a deletion. | ||
deletions++; | ||
i--; | ||
} else { | ||
// If there there was no match for the diagonal cell or the top/above | ||
// cell, then we must be at the left cell, which means we have an insertion. | ||
insertions++; | ||
j--; | ||
} | ||
} | ||
|
||
return {substitutions, deletions, insertions}; | ||
} | ||
|
||
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text) { | ||
std::vector<std::string> reference = split_into_words(reference_text); | ||
std::vector<std::string> actual = split_into_words(actual_text); | ||
|
||
auto [n_sub, n_del, n_ins] = count_edit_ops(reference, actual); | ||
int n_edits = n_sub + n_del + n_ins; | ||
|
||
double wer = 0.0; | ||
if (!reference.empty()) { | ||
wer = static_cast<double>(n_edits) / reference.size(); | ||
} | ||
|
||
return wer_result{ | ||
/* n_ref_words */ reference.size(), | ||
/* n_act_words */ actual.size(), | ||
/* n_sub */ n_sub, | ||
/* n_del */ n_del, | ||
/* n_ins */ n_ins, | ||
/* n_edits */ n_edits, | ||
/* wer */ wer | ||
}; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#ifndef WER_H | ||
#define WER_H | ||
#include <vector> | ||
#include <string> | ||
|
||
struct wer_result { | ||
size_t n_ref_words; // Number of words in the reference text. | ||
size_t n_act_words; // Number of words in the actual (transcribed) text. | ||
int n_sub; // Number of substitutions. | ||
int n_del; // Number of deletions. | ||
int n_ins; // Number of insertions. | ||
int n_edits; // Total number of edits. | ||
double wer; // The word error rate. | ||
}; | ||
|
||
wer_result calculate_wer(const std::string& reference_text, const std::string& actual_text); | ||
|
||
#endif // WER_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
set(TARGET whisper-wer) | ||
add_executable(${TARGET} cli.cpp) | ||
|
||
include(DefaultTargetOptions) | ||
|
||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT}) | ||
|
||
install(TARGETS ${TARGET} RUNTIME) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# whisper.cpp/examples/wer | ||
|
||
This is a command line tool for calculating the Word Error Rate (WER). This tool | ||
expects that reference transcriptions (the known correct transcriptions) | ||
and acutual transcriptions from whisper.cpp are available in two separate | ||
directories where the file names are the identical. | ||
|
||
### Usage | ||
```console | ||
$ ./build/bin/whisper-wer | ||
Usage: ./build/bin/whisper-wer [options] | ||
Options: | ||
-r, --reference PATH Full path to reference transcriptions directory | ||
-a, --actual PATH Full path to actual transcriptions directory | ||
--help Display this help message | ||
``` | ||
|
||
### Example Usage with whisper-cli | ||
First, generate transcription(s) using whisper-cli: | ||
``` | ||
./build/bin/whisper-cli \ | ||
-m models/ggml-base.en.bin \ | ||
-f samples/jfk.wav \ | ||
--output-txt | ||
... | ||
output_txt: saving output to 'samples/jfk.wav.txt' | ||
``` | ||
Next, copy the transcription to a directory where the actual transcriptions | ||
are stored. In this example we will use a directory called `actual_transcriptions` | ||
in this examples directory: | ||
```console | ||
$ cp samples/jfk.wav.txt examples/wer/actual_transcriptions | ||
``` | ||
In a real world scenario the reference transcriptions would be available | ||
representing the known correct text. In this case we have already placed a file | ||
in `examples/wer/reference_transcriptions` that can be used for testing, where | ||
only a single word was changed (`Americans` -> `Swedes`). | ||
|
||
Finally, run the whisper-wer tool: | ||
```console | ||
$ ./build/bin/whisper-wer -r examples/wer/reference_transcriptions/ -a examples/wer/actual_transcriptions/ | ||
Word Error Rate for : jfk.wav.txt | ||
Reference words: 22 | ||
Actual words: 22 | ||
Substitutions: 1 | ||
Deletions: 0 | ||
Insertions: 0 | ||
Total edits: 1 | ||
WER: 0.045455 | ||
``` | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
#include "wer.h" | ||
|
||
#include <cstdio> | ||
#include <vector> | ||
#include <string> | ||
#include <filesystem> | ||
#include <fstream> | ||
#include <cstring> | ||
#include <map> | ||
|
||
std::vector<std::string> read_files_from_directory(const std::string& dir_path) { | ||
std::vector<std::string> file_paths; | ||
try { | ||
for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { | ||
if (entry.is_regular_file() && entry.path().extension() == ".txt") { | ||
file_paths.push_back(entry.path().string()); | ||
} | ||
} | ||
} catch (const std::filesystem::filesystem_error& e) { | ||
printf("Error reading directory %s: %s\n", dir_path.c_str(), e.what()); | ||
} | ||
return file_paths; | ||
} | ||
|
||
std::string read_file_content(const std::string& file_path) { | ||
std::ifstream file(file_path); | ||
std::string content; | ||
|
||
if (file.is_open()) { | ||
std::string line; | ||
while (std::getline(file, line)) { | ||
content += line + "\n"; | ||
} | ||
file.close(); | ||
} else { | ||
printf("Unable to open file: %s\n", file_path.c_str()); | ||
} | ||
|
||
return content; | ||
} | ||
|
||
std::string get_base_filename(const std::string& path) { | ||
return std::filesystem::path(path).filename().string(); | ||
} | ||
|
||
void print_usage(const char* program_name) { | ||
printf("Usage: %s [options]\n", program_name); | ||
printf("Options:\n"); | ||
printf(" -r, --reference PATH Full path to reference transcriptions directory\n"); | ||
printf(" -a, --actual PATH Full path to actual transcriptions directory\n"); | ||
printf(" --help Display this help message\n"); | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
if (argc == 1) { | ||
print_usage(argv[0]); | ||
return 0; | ||
} | ||
|
||
std::string reference_path; | ||
std::string actual_path; | ||
bool reference_set = false; | ||
bool actual_set = false; | ||
|
||
for (int i = 1; i < argc; i++) { | ||
if (strcmp(argv[i], "--help") == 0) { | ||
print_usage(argv[0]); | ||
return 0; | ||
} else if (strcmp(argv[i], "-r") == 0 || strcmp(argv[i], "--reference") == 0) { | ||
if (i + 1 < argc) { | ||
reference_path = argv[++i]; | ||
reference_set = true; | ||
} else { | ||
printf("Error: Missing path after %s\n", argv[i]); | ||
print_usage(argv[0]); | ||
return 1; | ||
} | ||
} else if (strcmp(argv[i], "-a") == 0 || strcmp(argv[i], "--actual") == 0) { | ||
if (i + 1 < argc) { | ||
actual_path = argv[++i]; | ||
actual_set = true; | ||
} else { | ||
printf("Error: Missing path after %s\n", argv[i]); | ||
print_usage(argv[0]); | ||
return 1; | ||
} | ||
} else { | ||
printf("Error: Unknown option: %s\n", argv[i]); | ||
print_usage(argv[0]); | ||
return 1; | ||
} | ||
} | ||
|
||
if (!reference_set || !actual_set) { | ||
printf("Error: Both reference and actual paths must be provided\n"); | ||
print_usage(argv[0]); | ||
return 1; | ||
} | ||
|
||
if (!std::filesystem::exists(reference_path) || !std::filesystem::is_directory(reference_path)) { | ||
printf("Error: Reference path '%s' does not exist or is not a directory\n", reference_path.c_str()); | ||
return 1; | ||
} | ||
|
||
if (!std::filesystem::exists(actual_path) || !std::filesystem::is_directory(actual_path)) { | ||
printf("Error: Actual path '%s' does not exist or is not a directory\n", actual_path.c_str()); | ||
return 1; | ||
} | ||
|
||
std::vector<std::string> reference_files = read_files_from_directory(reference_path); | ||
std::vector<std::string> actual_files = read_files_from_directory(actual_path); | ||
|
||
//printf("Found %zu reference files in %s\n", reference_files.size(), reference_path.c_str()); | ||
//printf("Found %zu actual files in %s\n", actual_files.size(), actual_path.c_str()); | ||
|
||
std::map<std::string, std::string> reference_map; | ||
std::map<std::string, std::string> actual_map; | ||
|
||
for (const auto& file : reference_files) { | ||
reference_map[get_base_filename(file)] = file; | ||
} | ||
|
||
for (const auto& file : actual_files) { | ||
actual_map[get_base_filename(file)] = file; | ||
} | ||
|
||
for (const auto& [filename, ref_path] : reference_map) { | ||
auto actual_it = actual_map.find(filename); | ||
if (actual_it != actual_map.end()) { | ||
std::string reference_content = read_file_content(ref_path); | ||
std::string actual_content = read_file_content(actual_it->second); | ||
|
||
wer_result result = calculate_wer(reference_content, actual_content); | ||
printf("Word Error Rate for : %s\n", filename.c_str()); | ||
printf(" Reference words: %ld\n", result.n_ref_words); | ||
printf(" Actual words: %ld\n", result.n_act_words); | ||
printf(" Substitutions: %d\n", result.n_sub); | ||
printf(" Deletions: %d\n", result.n_del); | ||
printf(" Insertions: %d\n", result.n_ins); | ||
printf(" Total edits: %d\n", result.n_edits); | ||
printf(" WER: %f\n", result.wer); | ||
} else { | ||
printf("Warning: No matching actual file found for reference file: %s\n", filename.c_str()); | ||
} | ||
} | ||
|
||
return 0; | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
And so my fellow Swedes, ask not what your country can do for you, ask what you can do for your country. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lol 😄