Fix csv parsing for unclosed quotes

Fixes #245
This commit is contained in:
DominikDoom
2023-10-06 14:44:03 +02:00
parent 446ac14e7f
commit 5fd48f53de
2 changed files with 8 additions and 8 deletions

View File

@@ -2,12 +2,12 @@
// Parse the CSV file into a 2D array. Doesn't use regex, so it is very lightweight.
function parseCSV(str) {
var arr = [];
var quote = false; // 'true' means we're inside a quoted field
const arr = [];
let quote = false; // 'true' means we're inside a quoted field
// Iterate over each character, keep track of current row and column (of the returned array)
for (var row = 0, col = 0, c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c + 1]; // Current character, next character
for (let row = 0, col = 0, c = 0; c < str.length; c++) {
let cc = str[c], nc = str[c+1]; // Current character, next character
arr[row] = arr[row] || []; // Create a new row if necessary
arr[row][col] = arr[row][col] || ''; // Create a new column (start with empty string) if necessary
@@ -24,12 +24,12 @@ function parseCSV(str) {
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
if (cc == '\r' && nc == '\n') { ++row; col = 0; ++c; quote = false; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
if (cc == '\n') { ++row; col = 0; quote = false; continue; }
if (cc == '\r') { ++row; col = 0; quote = false; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;

View File

@@ -20,7 +20,7 @@ async function load() {
// Add to the dict
csv_lines.forEach(parts => {
const hash = parts[0];
const keywords = parts[1].replaceAll("| ", ", ").replaceAll("|", ", ").trim();
const keywords = parts[1]?.replaceAll("| ", ", ")?.replaceAll("|", ", ")?.trim();
const lastSepIndex = parts[2]?.lastIndexOf("/") + 1 || parts[2]?.lastIndexOf("\\") + 1 || 0;
const name = parts[2]?.substring(lastSepIndex).trim() || "none"