mirror of
https://codeberg.org/PostERG/xamxam.git
synced 2026-06-25 16:19:19 +02:00
Fix: CSV importer and imported data
- pad rows, distinguish empty year, better error diagnostics - derive year from identifier when year column is empty - fix remaining 18 theses: Installation/Performance (slash→dash) orientation alias - csv importer: use column-name-based header detection instead of hardcoded positions
This commit is contained in:
6
TODO.md
6
TODO.md
@@ -75,10 +75,14 @@
|
|||||||
- [x] `templates/admin/edit.php` — moved `.admin-form-footer` from bottom to top-right, right after `<h1>`
|
- [x] `templates/admin/edit.php` — moved `.admin-form-footer` from bottom to top-right, right after `<h1>`
|
||||||
- [x] `admin.css` — added `.admin-form-footer--sticky` variant with `position:sticky; top:0; justify-content:flex-end`
|
- [x] `admin.css` — added `.admin-form-footer--sticky` variant with `position:sticky; top:0; justify-content:flex-end`
|
||||||
|
|
||||||
## Fix CSV importer robustness
|
## Fix CSV importer column shift and data repair
|
||||||
- [x] Pad rows to expected column count to avoid offset warnings from short rows
|
- [x] Pad rows to expected column count to avoid offset warnings from short rows
|
||||||
- [x] Distinguish `$yearRaw !== ''` before `intval()` to handle empty-year rows correctly
|
- [x] Distinguish `$yearRaw !== ''` before `intval()` to handle empty-year rows correctly
|
||||||
- [x] Improve missing-field error message: lists which fields are missing, includes identifier/title snippet
|
- [x] Improve missing-field error message: lists which fields are missing, includes identifier/title snippet
|
||||||
|
- [x] Derive year from identifier when year column is empty
|
||||||
|
- [x] Auto-detect column-shifted CSV: when orientation/finality columns are empty but synopsis/context match known orientation/finality names, remap on import
|
||||||
|
- [x] Migration `013_fix_csv_column_shift.sql`: move orientation from synopsis→orientation_id, finality from context_note→finality_id for already-imported theses
|
||||||
|
- [x] Migration `013_fix_remarks_keywords.php`: move keywords from remarks→tags+thesis_tags for already-imported theses
|
||||||
|
|
||||||
## Standardise répertoire filter column rendering
|
## Standardise répertoire filter column rendering
|
||||||
- [x] Centralise filter column rendering into a shared `repFilterEntry()` function
|
- [x] Centralise filter column rendering into a shared `repFilterEntry()` function
|
||||||
|
|||||||
30
app/migrations/applied/013_fix_csv_column_shift.sql
Normal file
30
app/migrations/applied/013_fix_csv_column_shift.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Fix theses that were imported with column-shifted CSV data.
|
||||||
|
-- Orientation names ended up in synopsis, finality names in context_note,
|
||||||
|
-- and keywords in remarks. Move them to the correct FK columns.
|
||||||
|
|
||||||
|
-- 1. Fix orientation_id from synopsis
|
||||||
|
UPDATE theses SET
|
||||||
|
orientation_id = (SELECT o.id FROM orientations o WHERE LOWER(o.name) = LOWER(theses.synopsis)),
|
||||||
|
synopsis = NULL
|
||||||
|
WHERE orientation_id IS NULL
|
||||||
|
AND synopsis IS NOT NULL
|
||||||
|
AND synopsis != ''
|
||||||
|
AND EXISTS (SELECT 1 FROM orientations o WHERE LOWER(o.name) = LOWER(theses.synopsis));
|
||||||
|
|
||||||
|
-- 2. Fix finality_id from context_note
|
||||||
|
UPDATE theses SET
|
||||||
|
finality_id = (SELECT ft.id FROM finality_types ft WHERE LOWER(ft.name) = LOWER(theses.context_note)),
|
||||||
|
context_note = NULL
|
||||||
|
WHERE finality_id IS NULL
|
||||||
|
AND context_note IS NOT NULL
|
||||||
|
AND context_note != ''
|
||||||
|
AND EXISTS (SELECT 1 FROM finality_types ft WHERE LOWER(ft.name) = LOWER(theses.context_note));
|
||||||
|
|
||||||
|
-- 3. Fix AP program from synopsis (if any synopsis values match AP names — edge case)
|
||||||
|
UPDATE theses SET
|
||||||
|
ap_program_id = (SELECT ap.id FROM ap_programs ap WHERE LOWER(ap.name) = LOWER(theses.synopsis)),
|
||||||
|
synopsis = NULL
|
||||||
|
WHERE ap_program_id IS NULL
|
||||||
|
AND synopsis IS NOT NULL
|
||||||
|
AND synopsis != ''
|
||||||
|
AND EXISTS (SELECT 1 FROM ap_programs ap WHERE LOWER(ap.name) = LOWER(theses.synopsis));
|
||||||
64
app/migrations/applied/013_fix_remarks_keywords.php
Normal file
64
app/migrations/applied/013_fix_remarks_keywords.php
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env php
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Migrate keywords from theses.remarks → tags + thesis_tags.
|
||||||
|
*
|
||||||
|
* The CSV importer (before the column-shift fix) stored comma-separated
|
||||||
|
* keywords in the `remarks` column. This script extracts them and creates
|
||||||
|
* proper tag rows, then clears the remarks column.
|
||||||
|
*
|
||||||
|
* Run: php migrations/pending/013_fix_remarks_keywords.php
|
||||||
|
*/
|
||||||
|
|
||||||
|
require_once __DIR__ . '/../../src/Database.php';
|
||||||
|
|
||||||
|
$db = Database::getInstance();
|
||||||
|
$pdo = $db->getPDO();
|
||||||
|
|
||||||
|
// Fetch theses with non-empty remarks
|
||||||
|
$rows = $pdo->query(
|
||||||
|
"SELECT id, remarks FROM theses WHERE remarks IS NOT NULL AND remarks != ''"
|
||||||
|
)->fetchAll();
|
||||||
|
|
||||||
|
$insertTag = $pdo->prepare('INSERT OR IGNORE INTO tags (name) VALUES (?)');
|
||||||
|
$getTagId = $pdo->prepare('SELECT id FROM tags WHERE name = ?');
|
||||||
|
$insertLink = $pdo->prepare('INSERT OR IGNORE INTO thesis_tags (thesis_id, tag_id) VALUES (?, ?)');
|
||||||
|
$clearRemarks = $pdo->prepare('UPDATE theses SET remarks = NULL WHERE id = ?');
|
||||||
|
|
||||||
|
$pdo->beginTransaction();
|
||||||
|
|
||||||
|
try {
|
||||||
|
$migrated = 0;
|
||||||
|
foreach ($rows as $row) {
|
||||||
|
$thesisId = (int)$row['id'];
|
||||||
|
$raw = trim($row['remarks']);
|
||||||
|
if ($raw === '') {
|
||||||
|
$clearRemarks->execute([$thesisId]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$keywords = array_map('trim', explode(',', $raw));
|
||||||
|
foreach ($keywords as $kw) {
|
||||||
|
$kw = trim($kw);
|
||||||
|
if ($kw === '' || mb_strlen($kw) > 100) continue;
|
||||||
|
|
||||||
|
// Create tag if needed
|
||||||
|
$insertTag->execute([$kw]);
|
||||||
|
$getTagId->execute([$kw]);
|
||||||
|
$tagId = $getTagId->fetchColumn();
|
||||||
|
if ($tagId) {
|
||||||
|
$insertLink->execute([$thesisId, (int)$tagId]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$clearRemarks->execute([$thesisId]);
|
||||||
|
$migrated++;
|
||||||
|
}
|
||||||
|
|
||||||
|
$pdo->commit();
|
||||||
|
echo "Done. Migrated keywords for $migrated theses.\n";
|
||||||
|
} catch (Throwable $e) {
|
||||||
|
$pdo->rollBack();
|
||||||
|
echo "Error: " . $e->getMessage() . "\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
@@ -33,10 +33,87 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['csv_file'])) {
|
|||||||
$handle = fopen($_FILES['csv_file']['tmp_name'], 'r');
|
$handle = fopen($_FILES['csv_file']['tmp_name'], 'r');
|
||||||
if (!$handle) throw new Exception("Impossible d'ouvrir le fichier CSV.");
|
if (!$handle) throw new Exception("Impossible d'ouvrir le fichier CSV.");
|
||||||
|
|
||||||
fgetcsv($handle, 0, ',', '"', '');
|
// Scan up to 8 rows looking for a header row with known column names.
|
||||||
fgetcsv($handle, 0, ',', '"', '');
|
// Build colIdx[name] → position map; fall back to positional if header not found.
|
||||||
fgetcsv($handle, 0, ',', '"', '');
|
// Matching uses prefix + variant logic so "contact.visible" matches "contact",
|
||||||
fgetcsv($handle, 0, ',', '"', ''); // skip 4 header rows
|
// "promoteur·ice(s)" matches "promoteur", "Licence" matches "license", etc.
|
||||||
|
$colIdx = null;
|
||||||
|
$headerRowNum = 0;
|
||||||
|
$knownHeaders = [
|
||||||
|
'identifiant', 'titre', 'sous-titre', 'auteur', 'contact',
|
||||||
|
'promoteur', 'format', 'année', 'ap', 'orientation', 'finalité',
|
||||||
|
'mots-clés', 'synopsis', 'contexte', 'remarques', 'langue',
|
||||||
|
'autorisation', 'licence', 'license', 'taille', 'points', 'lien baiu',
|
||||||
|
];
|
||||||
|
for ($scan = 0; $scan < 8; $scan++) {
|
||||||
|
$hrow = fgetcsv($handle, 0, ',', '\"', '');
|
||||||
|
if ($hrow === false) break;
|
||||||
|
$headerRowNum++;
|
||||||
|
$normRow = array_map(fn($s) => strtolower(trim((string)$s)), $hrow);
|
||||||
|
$hits = 0;
|
||||||
|
$map = [];
|
||||||
|
$used = [];
|
||||||
|
foreach ($knownHeaders as $h) {
|
||||||
|
foreach ($normRow as $pos => $cell) {
|
||||||
|
if (isset($used[$pos])) continue;
|
||||||
|
// Exact match
|
||||||
|
if ($cell === $h) { $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||||
|
// Licence/License cross-match
|
||||||
|
if (($h === 'licence' && $cell === 'license') || ($h === 'license' && $cell === 'licence'))
|
||||||
|
{ $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||||
|
// Prefix match (for compound headers like "contact.visible")
|
||||||
|
$hlen = strlen($h);
|
||||||
|
if ($hlen >= 4 && str_starts_with($cell, $h)) {
|
||||||
|
// Avoid short prefixes matching unrelated words
|
||||||
|
if ($hlen >= 5 || $cell === $h) { $hits++; $map[$h] = $pos; $used[$pos] = true; break; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Require at least 8 known headers to trust the row.
|
||||||
|
if ($hits >= 8) { $colIdx = $map; break; }
|
||||||
|
}
|
||||||
|
// If no header row found, rewind and fall back to positional (skip 4 rows).
|
||||||
|
if ($colIdx === null) {
|
||||||
|
rewind($handle);
|
||||||
|
$headerRowNum = 4;
|
||||||
|
for ($i = 0; $i < 4; $i++) fgetcsv($handle);
|
||||||
|
} else {
|
||||||
|
// Consume blank/instruction/template rows between header and data.
|
||||||
|
// Stops when a row has a non-empty identifiant column that is not a
|
||||||
|
// template placeholder (e.g. "Column1") or instruction snippet.
|
||||||
|
$idPos = $colIdx['identifiant'] ?? 0;
|
||||||
|
$peekRow = null;
|
||||||
|
while (true) {
|
||||||
|
$peek = fgetcsv($handle, 0, ',', '\"', '');
|
||||||
|
if ($peek === false) break;
|
||||||
|
$headerRowNum++;
|
||||||
|
$val = trim((string)($peek[$idPos] ?? ''));
|
||||||
|
if ($val === '' || str_starts_with(strtolower($val), 'column')
|
||||||
|
|| str_contains(strtolower($val), 'éparer')) {
|
||||||
|
continue; // metadata row, skip
|
||||||
|
}
|
||||||
|
$peekRow = $peek;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper: get cell value by column name.
|
||||||
|
// When header was found: only use mapped column (returns '' if missing from header).
|
||||||
|
// When no header found: use positional fallback index.
|
||||||
|
$cell = function(array $row, string $name, int $fallbackPos) use ($colIdx): string {
|
||||||
|
if ($colIdx !== null) {
|
||||||
|
$pos = $colIdx[$name] ?? null;
|
||||||
|
if ($pos === null) {
|
||||||
|
// Try licence/license cross-lookup
|
||||||
|
if ($name === 'license') $pos = $colIdx['licence'] ?? null;
|
||||||
|
elseif ($name === 'licence') $pos = $colIdx['license'] ?? null;
|
||||||
|
}
|
||||||
|
if ($pos === null) return '';
|
||||||
|
} else {
|
||||||
|
$pos = $fallbackPos;
|
||||||
|
}
|
||||||
|
return isset($row[$pos]) ? trim((string)$row[$pos]) : '';
|
||||||
|
};
|
||||||
|
|
||||||
// Code → canonical name (legacy short-code CSV format)
|
// Code → canonical name (legacy short-code CSV format)
|
||||||
$orientationCodeMap = [
|
$orientationCodeMap = [
|
||||||
@@ -132,39 +209,48 @@ if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_FILES['csv_file'])) {
|
|||||||
return $r ? (int)$r['id'] : null;
|
return $r ? (int)$r['id'] : null;
|
||||||
};
|
};
|
||||||
|
|
||||||
$lineNumber = 5;
|
$lineNumber = $headerRowNum;
|
||||||
while (($row = fgetcsv($handle, 0, ',', '"', '')) !== false) {
|
$usePeek = isset($peekRow) && $peekRow !== null;
|
||||||
|
while (true) {
|
||||||
|
if ($usePeek) {
|
||||||
|
$row = $peekRow;
|
||||||
|
$usePeek = false;
|
||||||
|
} else {
|
||||||
|
$row = fgetcsv($handle, 0, ',', '\"', '');
|
||||||
|
if ($row === false) break;
|
||||||
|
}
|
||||||
$lineNumber++;
|
$lineNumber++;
|
||||||
if (empty($row[0]) && empty($row[1])) continue;
|
if (empty($row[0]) && empty($row[1])) continue;
|
||||||
try {
|
try {
|
||||||
$importDb->beginTransaction();
|
$importDb->beginTransaction();
|
||||||
|
|
||||||
// Pad row to expected column count to avoid offset warnings.
|
$identifier = $cell($row, 'identifiant', 0);
|
||||||
$expectedCols = 21;
|
$title = $cell($row, 'titre', 1);
|
||||||
while (count($row) < $expectedCols) $row[] = '';
|
$subtitle = $cell($row, 'sous-titre', 2);
|
||||||
|
$authorsRaw = $cell($row, 'auteur', 3);
|
||||||
$identifier = trim($row[0]);
|
$contact = $cell($row, 'contact', 4);
|
||||||
$title = trim($row[1]);
|
$supervisorsRaw = $cell($row, 'promoteur', 5);
|
||||||
$subtitle = trim($row[2]);
|
$formatsRaw = $cell($row, 'format', 6);
|
||||||
$authorsRaw = trim($row[3]);
|
$yearRaw = $cell($row, 'année', 7);
|
||||||
$contact = trim($row[4]);
|
|
||||||
$supervisorsRaw = trim($row[5]);
|
|
||||||
$formatsRaw = trim($row[6]);
|
|
||||||
$yearRaw = trim($row[7]);
|
|
||||||
$year = $yearRaw !== '' ? intval($yearRaw) : 0;
|
$year = $yearRaw !== '' ? intval($yearRaw) : 0;
|
||||||
$apCode = trim($row[8]);
|
// Fallback: derive year from identifier (e.g. "2024-003" → 2024)
|
||||||
$orientationCode = trim($row[9]);
|
if ($year === 0 && $identifier !== '' && preg_match('/^(\d{4})-/', $identifier, $m)) {
|
||||||
$finalityName = trim($row[10]);
|
$year = (int)$m[1];
|
||||||
$keywordsRaw = trim($row[11]);
|
}
|
||||||
$synopsis = trim($row[12]);
|
$apCode = $cell($row, 'ap', 8);
|
||||||
$context = trim($row[13]);
|
$orientationCode = $cell($row, 'orientation', 9);
|
||||||
$remarks = trim($row[14]);
|
$finalityName = $cell($row, 'finalité', 10);
|
||||||
$languageRaw = trim($row[15]);
|
$keywordsRaw = $cell($row, 'mots-clés', 11);
|
||||||
$access = trim($row[16]);
|
$synopsis = $cell($row, 'synopsis', 12);
|
||||||
$license = trim($row[17]);
|
$context = $cell($row, 'contexte', 13);
|
||||||
$sizeInfo = trim($row[18]);
|
$remarks = $cell($row, 'remarques', 14);
|
||||||
$juryPoints = !empty($row[19]) ? floatval($row[19]) : null;
|
$languageRaw = $cell($row, 'langue', 15);
|
||||||
$baiuLink = trim($row[20]);
|
$access = $cell($row, 'autorisation', 16);
|
||||||
|
$license = $cell($row, 'license', 17);
|
||||||
|
$sizeInfo = $cell($row, 'taille', 18);
|
||||||
|
$juryPointsRaw = $cell($row, 'points', 19);
|
||||||
|
$juryPoints = $juryPointsRaw !== '' ? floatval($juryPointsRaw) : null;
|
||||||
|
$baiuLink = $cell($row, 'lien baiu', 20);
|
||||||
|
|
||||||
if ($title === '' || $year === 0) {
|
if ($title === '' || $year === 0) {
|
||||||
$missing = [];
|
$missing = [];
|
||||||
|
|||||||
@@ -8,3 +8,5 @@
|
|||||||
{"timestamp":"2026-05-05T09:33:13+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"thesis","action":"csv_export","status":"success"}
|
{"timestamp":"2026-05-05T09:33:13+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"thesis","action":"csv_export","status":"success"}
|
||||||
{"timestamp":"2026-05-05T09:33:44+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"settings","action":"formulaire_update","status":"success","context":{"values":{"access_type_libre_enabled":"0","access_type_interne_enabled":"1","access_type_interdit_enabled":"1","restricted_files_enabled":"1"}}}
|
{"timestamp":"2026-05-05T09:33:44+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"settings","action":"formulaire_update","status":"success","context":{"values":{"access_type_libre_enabled":"0","access_type_interne_enabled":"1","access_type_interdit_enabled":"1","restricted_files_enabled":"1"}}}
|
||||||
{"timestamp":"2026-05-05T16:40:13+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"system","action":"delete_all_theses","status":"success","context":{"count":13}}
|
{"timestamp":"2026-05-05T16:40:13+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"system","action":"delete_all_theses","status":"success","context":{"count":13}}
|
||||||
|
{"timestamp":"2026-05-05T16:57:57+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"thesis","action":"publish","status":"success","context":{"count":15,"ids":[53,52,51,50,49,48,47,46,45,44,43,42,41,40,39]}}
|
||||||
|
{"timestamp":"2026-05-05T16:58:02+00:00","ip":"127.0.0.1","user_agent":"Mozilla/5.0 (X11; Linux x86_64; rv:150.0) Gecko/20100101 Firefox/150.0","resource":"thesis","action":"publish","status":"success","context":{"count":25,"ids":[178,177,176,175,174,173,172,171,170,169,168,167,166,165,164,163,162,161,160,159,158,157,156,155,154]}}
|
||||||
|
|||||||
Reference in New Issue
Block a user