diff --git a/experimental/builder/test/testing_utils.cpp b/experimental/builder/test/testing_utils.cpp index f44eef12e6..e8412bbdf9 100644 --- a/experimental/builder/test/testing_utils.cpp +++ b/experimental/builder/test/testing_utils.cpp @@ -16,104 +16,160 @@ bool isTerminalOutput() return isatty(fileno(stdout)); // or stderr } -// These are reversed because the code below is weird. -const char* EXPECTED_COLOR = isTerminalOutput() ? "m63[\033" : ""; // Cyan -const char* ACTUAL_COLOR = isTerminalOutput() ? "m53[\033" : ""; // Magenta -const char* RESET = isTerminalOutput() ? "m0[\033" : ""; +const char* EXPECTED_COLOR = isTerminalOutput() ? "\033[36m" : ""; // Cyan +const char* ACTUAL_COLOR = isTerminalOutput() ? "\033[35m" : ""; // Magenta +const char* RESET = isTerminalOutput() ? "\033[0m" : ""; } // namespace -// Copilot-generated diff function, seems to work great. -// TODO: Study this, maybe add unit tests? +// Wagner-Fischer Algorithm for Computing Edit Distance and Inline Diff // -// This implementation is based on the Wagner-Fischer algorithm to find the -// optimal edit script. +// OUTPUT FORMAT: [expected|actual] for differences, plain text for matches +// Example: "hello world" vs "hello earth" → "hello [world|earth]" +// +// This function implements the Wagner-Fischer algorithm (1974), which is the classic +// dynamic programming solution for computing the minimum edit distance (Levenshtein distance) +// between two strings. The algorithm has O(n*m) time and space complexity. +// +// ALGORITHM OVERVIEW: +// 1. Build a 2D DP table where dp[i][j] represents the minimum edit distance +// between the first i characters of 'expected' and first j characters of 'actual' +// 2. Fill the table using the recurrence relation: +// dp[i][j] = min( +// dp[i-1][j] + 1, // deletion (remove char from expected) +// dp[i][j-1] + 1, // insertion (add char to expected) +// dp[i-1][j-1] + cost // substitution (cost=0 if chars match, 1 if different) +// ) +// 3. Backtrack through the table to reconstruct the optimal edit sequence +// +// REFERENCES: +// - Wagner, R. A.; Fischer, M. J. (1974). "The String-to-String Correction Problem" +// - Also known as: Levenshtein distance, edit distance, string alignment +// - Similar to sequence alignment algorithms used in bioinformatics (Needleman-Wunsch) std::string inlineDiff(const std::string& actual, const std::string& expected) { - const size_t n = expected.length(); - const size_t m = actual.length(); + const size_t n = expected.length(); // Length of expected string + const size_t m = actual.length(); // Length of actual string + // PHASE 1: Build the Dynamic Programming Table + // dp[i][j] = minimum edit distance between expected[0..i-1] and actual[0..j-1] std::vector> dp(n + 1, std::vector(m + 1)); + // Base cases: transforming empty string to/from prefixes for(size_t i = 0; i <= n; ++i) { - dp[i][0] = i; + dp[i][0] = i; // Delete i characters from expected to get empty string } for(size_t j = 0; j <= m; ++j) { - dp[0][j] = j; + dp[0][j] = j; // Insert j characters to empty string to get actual[0..j-1] } + // Fill the DP table using the Wagner-Fischer recurrence relation for(size_t i = 1; i <= n; ++i) { for(size_t j = 1; j <= m; ++j) { + // Cost is 0 if characters match, 1 if they need substitution int cost = (expected[i - 1] == actual[j - 1]) ? 0 : 1; - dp[i][j] = std::min({dp[i - 1][j] + 1, // Deletion - dp[i][j - 1] + 1, // Insertion - dp[i - 1][j - 1] + cost}); // Substitution/Match + + // Choose the minimum cost operation: + dp[i][j] = std::min({ + dp[i - 1][j] + 1, // Deletion: remove expected[i-1] + dp[i][j - 1] + 1, // Insertion: add actual[j-1] + dp[i - 1][j - 1] + cost // Substitution/Match + }); } } - std::ostringstream diff; - std::string expected_diff, actual_diff; - bool in_diff = false; - - size_t i = n, j = m; + // PHASE 2: Backtrack to Reconstruct the Optimal Edit Sequence + // We trace back from dp[n][m] to dp[0][0] to find which operations were used + std::vector operations; // 'M'atch, 'S'ubstitution, 'I'nsertion, 'D'eletion + std::vector> diff_chars; // Character pairs for each operation + + size_t i = n, j = m; // Start from bottom-right corner of DP table while(i > 0 || j > 0) { + // Determine which operation led to the current cell's value int cost = (i > 0 && j > 0 && expected[i - 1] == actual[j - 1]) ? 0 : 1; + // Check if we came from diagonal (substitution/match) if(i > 0 && j > 0 && dp[i][j] == dp[i - 1][j - 1] + cost) { if(cost == 0) { - if(in_diff) - { - std::reverse(expected_diff.begin(), expected_diff.end()); - std::reverse(actual_diff.begin(), actual_diff.end()); - diff << "]" << RESET << expected_diff << EXPECTED_COLOR << "|" << RESET - << actual_diff << ACTUAL_COLOR << "["; - expected_diff.clear(); - actual_diff.clear(); - in_diff = false; - } - diff << expected[i - 1]; + operations.push_back('M'); // Characters match + diff_chars.push_back({expected[i - 1], actual[j - 1]}); } else { - in_diff = true; - expected_diff += expected[i - 1]; - actual_diff += actual[j - 1]; + operations.push_back('S'); // Substitution needed + diff_chars.push_back({expected[i - 1], actual[j - 1]}); } - --i; - --j; + --i; --j; // Move diagonally up-left } + // Check if we came from left (insertion) else if(j > 0 && dp[i][j] == dp[i][j - 1] + 1) { - in_diff = true; - actual_diff += actual[j - 1]; - --j; + operations.push_back('I'); // Insertion: actual has extra character + diff_chars.push_back({'\0', actual[j - 1]}); + --j; // Move left } + // Must have come from above (deletion) else if(i > 0 && dp[i][j] == dp[i - 1][j] + 1) { - in_diff = true; - expected_diff += expected[i - 1]; - --i; + operations.push_back('D'); // Deletion: expected has extra character + diff_chars.push_back({expected[i - 1], '\0'}); + --i; // Move up } } - if(in_diff) + // PHASE 3: Reverse and Build the Human-Readable Diff String + // Backtracking gives us operations in reverse order, so we reverse to get forward order + std::reverse(operations.begin(), operations.end()); + std::reverse(diff_chars.begin(), diff_chars.end()); + + // Build the final diff string with color highlighting + std::ostringstream diff; + std::string expected_diff, actual_diff; // Accumulate consecutive differences + bool in_diff = false; // Track whether we're inside a diff section + + for(size_t k = 0; k < operations.size(); ++k) { - std::reverse(expected_diff.begin(), expected_diff.end()); - std::reverse(actual_diff.begin(), actual_diff.end()); - diff << "]" << RESET << expected_diff << EXPECTED_COLOR << "|" << RESET << actual_diff - << ACTUAL_COLOR << "["; + char op = operations[k]; + char exp_char = diff_chars[k].first; // Expected character ('\0' for insertions) + char act_char = diff_chars[k].second; // Actual character ('\0' for deletions) + + if(op == 'M') // Match - characters are identical + { + if(in_diff) + { + // Close the current diff section and output it + diff << "[" << EXPECTED_COLOR << expected_diff << RESET << "|" + << ACTUAL_COLOR << actual_diff << RESET << "]"; + expected_diff.clear(); + actual_diff.clear(); + in_diff = false; + } + diff << exp_char; // Output the matching character as-is + } + else // Difference (substitution, insertion, or deletion) + { + in_diff = true; + // Accumulate characters for the diff section + if(exp_char != '\0') expected_diff += exp_char; // Add to expected side + if(act_char != '\0') actual_diff += act_char; // Add to actual side + } } - std::string result = diff.str(); - std::reverse(result.begin(), result.end()); - return result; + // Close any remaining diff section at the end + if(in_diff) + { + diff << "[" << EXPECTED_COLOR << expected_diff << RESET << "|" + << ACTUAL_COLOR << actual_diff << RESET << "]"; + } + + return diff.str(); } std::string formatInlineDiff(const std::string& actual, const std::string& expected)