webmoney/backend/app/Services/Import/CsvParser.php

222 lines
6.4 KiB
PHP

<?php
namespace App\Services\Import;
class CsvParser implements FileParserInterface
{
protected static array $supportedExtensions = ['csv', 'txt'];
/**
* Parse the CSV file and return all data
*/
public function parse(string $filePath, array $options = []): array
{
$headerRow = $options['header_row'] ?? 0;
$dataStartRow = $options['data_start_row'] ?? 1;
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
$enclosure = $options['enclosure'] ?? '"';
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
$data = [];
$headers = [];
$rowIndex = 0;
$handle = fopen($filePath, 'r');
if ($handle === false) {
throw new \RuntimeException("Could not open file: $filePath");
}
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
// Converter encoding se necessário
if ($encoding !== 'UTF-8') {
$row = array_map(function ($value) use ($encoding) {
return mb_convert_encoding($value, 'UTF-8', $encoding);
}, $row);
}
if ($rowIndex === $headerRow) {
$headers = $row;
$rowIndex++;
continue;
}
if ($rowIndex < $dataStartRow) {
$rowIndex++;
continue;
}
// Verificar se não está vazia
$nonEmpty = array_filter($row, fn($v) => $v !== null && $v !== '');
if (!empty($nonEmpty)) {
$data[] = $row;
}
$rowIndex++;
}
fclose($handle);
return [
'headers' => $headers,
'data' => $data,
'total_rows' => count($data),
'detected_delimiter' => $delimiter,
'detected_encoding' => $encoding,
];
}
/**
* Get headers from CSV file
*/
public function getHeaders(string $filePath, array $options = []): array
{
$headerRow = $options['header_row'] ?? 0;
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
$enclosure = $options['enclosure'] ?? '"';
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
$handle = fopen($filePath, 'r');
if ($handle === false) {
throw new \RuntimeException("Could not open file: $filePath");
}
$rowIndex = 0;
$headers = [];
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
if ($rowIndex === $headerRow) {
$headers = $row;
break;
}
$rowIndex++;
}
fclose($handle);
// Converter encoding se necessário
if ($encoding !== 'UTF-8') {
$headers = array_map(function ($value) use ($encoding) {
return mb_convert_encoding($value, 'UTF-8', $encoding);
}, $headers);
}
return $headers;
}
/**
* Get preview data
*/
public function getPreview(string $filePath, int $rows = 10, array $options = []): array
{
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
$enclosure = $options['enclosure'] ?? '"';
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
$preview = [];
$rowCount = 0;
$totalRows = 0;
$handle = fopen($filePath, 'r');
if ($handle === false) {
throw new \RuntimeException("Could not open file: $filePath");
}
// Primeiro, contar todas as linhas
while (fgets($handle) !== false) {
$totalRows++;
}
// Voltar ao início
rewind($handle);
// Ler preview
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
if ($rowCount >= $rows) {
break;
}
// Converter encoding se necessário
if ($encoding !== 'UTF-8') {
$row = array_map(function ($value) use ($encoding) {
return mb_convert_encoding($value, 'UTF-8', $encoding);
}, $row);
}
$preview[] = [
'row_index' => $rowCount,
'data' => $row,
];
$rowCount++;
}
fclose($handle);
return [
'preview' => $preview,
'total_rows' => $totalRows,
'columns_count' => !empty($preview) ? count($preview[0]['data']) : 0,
'detected_delimiter' => $delimiter,
'detected_encoding' => $encoding,
];
}
/**
* Detect CSV delimiter
*/
protected function detectDelimiter(string $filePath): string
{
$delimiters = [',', ';', "\t", '|'];
$counts = array_fill_keys($delimiters, 0);
$handle = fopen($filePath, 'r');
if ($handle === false) {
return ',';
}
// Ler primeiras 5 linhas
$lines = 0;
while (($line = fgets($handle)) !== false && $lines < 5) {
foreach ($delimiters as $d) {
$counts[$d] += substr_count($line, $d);
}
$lines++;
}
fclose($handle);
// Retornar o delimitador mais frequente
arsort($counts);
return array_key_first($counts);
}
/**
* Detect file encoding
*/
protected function detectEncoding(string $filePath): string
{
$content = file_get_contents($filePath, false, null, 0, 10000);
// Verificar BOM UTF-8
if (substr($content, 0, 3) === "\xEF\xBB\xBF") {
return 'UTF-8';
}
// Tentar detectar encoding
$encoding = mb_detect_encoding($content, ['UTF-8', 'ISO-8859-1', 'Windows-1252', 'ASCII'], true);
return $encoding ?: 'UTF-8';
}
/**
* Check if parser supports the extension
*/
public static function supports(string $extension): bool
{
return in_array(strtolower($extension), self::$supportedExtensions);
}
}