mirror of
https://codeberg.org/PostERG/xamxam.git
synced 2026-06-25 16:19:19 +02:00
Fix: CSV importer and imported data
- pad rows, distinguish empty year, better error diagnostics - derive year from identifier when year column is empty - fix remaining 18 theses: Installation/Performance (slash→dash) orientation alias - csv importer: use column-name-based header detection instead of hardcoded positions
This commit is contained in:
@@ -33,10 +33,87 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['csv_file'])) {
|
||||
$handle = fopen($_FILES['csv_file']['tmp_name'], 'r');
|
||||
if (!$handle) throw new Exception("Impossible d'ouvrir le fichier CSV.");
|
||||
|
||||
fgetcsv($handle, 0, ',', '"', '');
|
||||
fgetcsv($handle, 0, ',', '"', '');
|
||||
fgetcsv($handle, 0, ',', '"', '');
|
||||
fgetcsv($handle, 0, ',', '"', ''); // skip 4 header rows
|
||||
// Scan up to 8 rows looking for a header row with known column names.
|
||||
// Build colIdx[name] → position map; fall back to positional if header not found.
|
||||
// Matching uses prefix + variant logic so "contact.visible" matches "contact",
|
||||
// "promoteur·ice(s)" matches "promoteur", "Licence" matches "license", etc.
|
||||
$colIdx = null;
|
||||
$headerRowNum = 0;
|
||||
$knownHeaders = [
|
||||
'identifiant', 'titre', 'sous-titre', 'auteur', 'contact',
|
||||
'promoteur', 'format', 'année', 'ap', 'orientation', 'finalité',
|
||||
'mots-clés', 'synopsis', 'contexte', 'remarques', 'langue',
|
||||
'autorisation', 'licence', 'license', 'taille', 'points', 'lien baiu',
|
||||
];
|
||||
for ($scan = 0; $scan < 8; $scan++) {
|
||||
$hrow = fgetcsv($handle, 0, ',', '\"', '');
|
||||
if ($hrow === false) break;
|
||||
$headerRowNum++;
|
||||
$normRow = array_map(fn($s) => strtolower(trim((string)$s)), $hrow);
|
||||
$hits = 0;
|
||||
$map = [];
|
||||
$used = [];
|
||||
foreach ($knownHeaders as $h) {
|
||||
foreach ($normRow as $pos => $cell) {
|
||||
if (isset($used[$pos])) continue;
|
||||
// Exact match
|
||||
if ($cell === $h) { $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||
// Licence/License cross-match
|
||||
if (($h === 'licence' && $cell === 'license') || ($h === 'license' && $cell === 'licence'))
|
||||
{ $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||
// Prefix match (for compound headers like "contact.visible")
|
||||
$hlen = strlen($h);
|
||||
if ($hlen >= 4 && str_starts_with($cell, $h)) {
|
||||
// Avoid short prefixes matching unrelated words
|
||||
if ($hlen >= 5 || $cell === $h) { $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
// Require at least 8 known headers to trust the row.
|
||||
if ($hits >= 8) { $colIdx = $map; break; }
|
||||
}
|
||||
// If no header row found, rewind and fall back to positional (skip 4 rows).
|
||||
if ($colIdx === null) {
|
||||
rewind($handle);
|
||||
$headerRowNum = 4;
|
||||
for ($i = 0; $i < 4; $i++) fgetcsv($handle);
|
||||
} else {
|
||||
// Consume blank/instruction/template rows between header and data.
|
||||
// Stops when a row has a non-empty identifiant column that is not a
|
||||
// template placeholder (e.g. "Column1") or instruction snippet.
|
||||
$idPos = $colIdx['identifiant'] ?? 0;
|
||||
$peekRow = null;
|
||||
while (true) {
|
||||
$peek = fgetcsv($handle, 0, ',', '\"', '');
|
||||
if ($peek === false) break;
|
||||
$headerRowNum++;
|
||||
$val = trim((string)($peek[$idPos] ?? ''));
|
||||
if ($val === '' || str_starts_with(strtolower($val), 'column')
|
||||
|| str_contains(strtolower($val), 'éparer')) {
|
||||
continue; // metadata row, skip
|
||||
}
|
||||
$peekRow = $peek;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: get cell value by column name.
|
||||
// When header was found: only use mapped column (returns '' if missing from header).
|
||||
// When no header found: use positional fallback index.
|
||||
$cell = function(array $row, string $name, int $fallbackPos) use ($colIdx): string {
|
||||
if ($colIdx !== null) {
|
||||
$pos = $colIdx[$name] ?? null;
|
||||
if ($pos === null) {
|
||||
// Try licence/license cross-lookup
|
||||
if ($name === 'license') $pos = $colIdx['licence'] ?? null;
|
||||
elseif ($name === 'licence') $pos = $colIdx['license'] ?? null;
|
||||
}
|
||||
if ($pos === null) return '';
|
||||
} else {
|
||||
$pos = $fallbackPos;
|
||||
}
|
||||
return isset($row[$pos]) ? trim((string)$row[$pos]) : '';
|
||||
};
|
||||
|
||||
// Code → canonical name (legacy short-code CSV format)
|
||||
$orientationCodeMap = [
|
||||
@@ -132,39 +209,48 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['csv_file'])) {
|
||||
return $r ? (int)$r['id'] : null;
|
||||
};
|
||||
|
||||
$lineNumber = 5;
|
||||
while (($row = fgetcsv($handle, 0, ',', '"', '')) !== false) {
|
||||
$lineNumber = $headerRowNum;
|
||||
$usePeek = isset($peekRow) && $peekRow !== null;
|
||||
while (true) {
|
||||
if ($usePeek) {
|
||||
$row = $peekRow;
|
||||
$usePeek = false;
|
||||
} else {
|
||||
$row = fgetcsv($handle, 0, ',', '\"', '');
|
||||
if ($row === false) break;
|
||||
}
|
||||
$lineNumber++;
|
||||
if (empty($row[0]) && empty($row[1])) continue;
|
||||
try {
|
||||
$importDb->beginTransaction();
|
||||
|
||||
// Pad row to expected column count to avoid offset warnings.
|
||||
$expectedCols = 21;
|
||||
while (count($row) < $expectedCols) $row[] = '';
|
||||
|
||||
$identifier = trim($row[0]);
|
||||
$title = trim($row[1]);
|
||||
$subtitle = trim($row[2]);
|
||||
$authorsRaw = trim($row[3]);
|
||||
$contact = trim($row[4]);
|
||||
$supervisorsRaw = trim($row[5]);
|
||||
$formatsRaw = trim($row[6]);
|
||||
$yearRaw = trim($row[7]);
|
||||
$identifier = $cell($row, 'identifiant', 0);
|
||||
$title = $cell($row, 'titre', 1);
|
||||
$subtitle = $cell($row, 'sous-titre', 2);
|
||||
$authorsRaw = $cell($row, 'auteur', 3);
|
||||
$contact = $cell($row, 'contact', 4);
|
||||
$supervisorsRaw = $cell($row, 'promoteur', 5);
|
||||
$formatsRaw = $cell($row, 'format', 6);
|
||||
$yearRaw = $cell($row, 'année', 7);
|
||||
$year = $yearRaw !== '' ? intval($yearRaw) : 0;
|
||||
$apCode = trim($row[8]);
|
||||
$orientationCode = trim($row[9]);
|
||||
$finalityName = trim($row[10]);
|
||||
$keywordsRaw = trim($row[11]);
|
||||
$synopsis = trim($row[12]);
|
||||
$context = trim($row[13]);
|
||||
$remarks = trim($row[14]);
|
||||
$languageRaw = trim($row[15]);
|
||||
$access = trim($row[16]);
|
||||
$license = trim($row[17]);
|
||||
$sizeInfo = trim($row[18]);
|
||||
$juryPoints = !empty($row[19]) ? floatval($row[19]) : null;
|
||||
$baiuLink = trim($row[20]);
|
||||
// Fallback: derive year from identifier (e.g. "2024-003" → 2024)
|
||||
if ($year === 0 && $identifier !== '' && preg_match('/^(\d{4})-/', $identifier, $m)) {
|
||||
$year = (int)$m[1];
|
||||
}
|
||||
$apCode = $cell($row, 'ap', 8);
|
||||
$orientationCode = $cell($row, 'orientation', 9);
|
||||
$finalityName = $cell($row, 'finalité', 10);
|
||||
$keywordsRaw = $cell($row, 'mots-clés', 11);
|
||||
$synopsis = $cell($row, 'synopsis', 12);
|
||||
$context = $cell($row, 'contexte', 13);
|
||||
$remarks = $cell($row, 'remarques', 14);
|
||||
$languageRaw = $cell($row, 'langue', 15);
|
||||
$access = $cell($row, 'autorisation', 16);
|
||||
$license = $cell($row, 'license', 17);
|
||||
$sizeInfo = $cell($row, 'taille', 18);
|
||||
$juryPointsRaw = $cell($row, 'points', 19);
|
||||
$juryPoints = $juryPointsRaw !== '' ? floatval($juryPointsRaw) : null;
|
||||
$baiuLink = $cell($row, 'lien baiu', 20);
|
||||
|
||||
if ($title === '' || $year === 0) {
|
||||
$missing = [];
|
||||
|
||||
Reference in New Issue
Block a user