222 lines
6.4 KiB
PHP
222 lines
6.4 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Import;
|
|
|
|
class CsvParser implements FileParserInterface
|
|
{
|
|
protected static array $supportedExtensions = ['csv', 'txt'];
|
|
|
|
/**
|
|
* Parse the CSV file and return all data
|
|
*/
|
|
public function parse(string $filePath, array $options = []): array
|
|
{
|
|
$headerRow = $options['header_row'] ?? 0;
|
|
$dataStartRow = $options['data_start_row'] ?? 1;
|
|
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
|
|
$enclosure = $options['enclosure'] ?? '"';
|
|
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
|
|
|
|
$data = [];
|
|
$headers = [];
|
|
$rowIndex = 0;
|
|
|
|
$handle = fopen($filePath, 'r');
|
|
|
|
if ($handle === false) {
|
|
throw new \RuntimeException("Could not open file: $filePath");
|
|
}
|
|
|
|
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
|
|
// Converter encoding se necessário
|
|
if ($encoding !== 'UTF-8') {
|
|
$row = array_map(function ($value) use ($encoding) {
|
|
return mb_convert_encoding($value, 'UTF-8', $encoding);
|
|
}, $row);
|
|
}
|
|
|
|
if ($rowIndex === $headerRow) {
|
|
$headers = $row;
|
|
$rowIndex++;
|
|
continue;
|
|
}
|
|
|
|
if ($rowIndex < $dataStartRow) {
|
|
$rowIndex++;
|
|
continue;
|
|
}
|
|
|
|
// Verificar se não está vazia
|
|
$nonEmpty = array_filter($row, fn($v) => $v !== null && $v !== '');
|
|
if (!empty($nonEmpty)) {
|
|
$data[] = $row;
|
|
}
|
|
|
|
$rowIndex++;
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
return [
|
|
'headers' => $headers,
|
|
'data' => $data,
|
|
'total_rows' => count($data),
|
|
'detected_delimiter' => $delimiter,
|
|
'detected_encoding' => $encoding,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Get headers from CSV file
|
|
*/
|
|
public function getHeaders(string $filePath, array $options = []): array
|
|
{
|
|
$headerRow = $options['header_row'] ?? 0;
|
|
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
|
|
$enclosure = $options['enclosure'] ?? '"';
|
|
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
|
|
|
|
$handle = fopen($filePath, 'r');
|
|
|
|
if ($handle === false) {
|
|
throw new \RuntimeException("Could not open file: $filePath");
|
|
}
|
|
|
|
$rowIndex = 0;
|
|
$headers = [];
|
|
|
|
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
|
|
if ($rowIndex === $headerRow) {
|
|
$headers = $row;
|
|
break;
|
|
}
|
|
$rowIndex++;
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
// Converter encoding se necessário
|
|
if ($encoding !== 'UTF-8') {
|
|
$headers = array_map(function ($value) use ($encoding) {
|
|
return mb_convert_encoding($value, 'UTF-8', $encoding);
|
|
}, $headers);
|
|
}
|
|
|
|
return $headers;
|
|
}
|
|
|
|
/**
|
|
* Get preview data
|
|
*/
|
|
public function getPreview(string $filePath, int $rows = 10, array $options = []): array
|
|
{
|
|
$delimiter = $options['delimiter'] ?? $this->detectDelimiter($filePath);
|
|
$enclosure = $options['enclosure'] ?? '"';
|
|
$encoding = $options['encoding'] ?? $this->detectEncoding($filePath);
|
|
|
|
$preview = [];
|
|
$rowCount = 0;
|
|
$totalRows = 0;
|
|
|
|
$handle = fopen($filePath, 'r');
|
|
|
|
if ($handle === false) {
|
|
throw new \RuntimeException("Could not open file: $filePath");
|
|
}
|
|
|
|
// Primeiro, contar todas as linhas
|
|
while (fgets($handle) !== false) {
|
|
$totalRows++;
|
|
}
|
|
|
|
// Voltar ao início
|
|
rewind($handle);
|
|
|
|
// Ler preview
|
|
while (($row = fgetcsv($handle, 0, $delimiter, $enclosure)) !== false) {
|
|
if ($rowCount >= $rows) {
|
|
break;
|
|
}
|
|
|
|
// Converter encoding se necessário
|
|
if ($encoding !== 'UTF-8') {
|
|
$row = array_map(function ($value) use ($encoding) {
|
|
return mb_convert_encoding($value, 'UTF-8', $encoding);
|
|
}, $row);
|
|
}
|
|
|
|
$preview[] = [
|
|
'row_index' => $rowCount,
|
|
'data' => $row,
|
|
];
|
|
|
|
$rowCount++;
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
return [
|
|
'preview' => $preview,
|
|
'total_rows' => $totalRows,
|
|
'columns_count' => !empty($preview) ? count($preview[0]['data']) : 0,
|
|
'detected_delimiter' => $delimiter,
|
|
'detected_encoding' => $encoding,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Detect CSV delimiter
|
|
*/
|
|
protected function detectDelimiter(string $filePath): string
|
|
{
|
|
$delimiters = [',', ';', "\t", '|'];
|
|
$counts = array_fill_keys($delimiters, 0);
|
|
|
|
$handle = fopen($filePath, 'r');
|
|
if ($handle === false) {
|
|
return ',';
|
|
}
|
|
|
|
// Ler primeiras 5 linhas
|
|
$lines = 0;
|
|
while (($line = fgets($handle)) !== false && $lines < 5) {
|
|
foreach ($delimiters as $d) {
|
|
$counts[$d] += substr_count($line, $d);
|
|
}
|
|
$lines++;
|
|
}
|
|
|
|
fclose($handle);
|
|
|
|
// Retornar o delimitador mais frequente
|
|
arsort($counts);
|
|
return array_key_first($counts);
|
|
}
|
|
|
|
/**
|
|
* Detect file encoding
|
|
*/
|
|
protected function detectEncoding(string $filePath): string
|
|
{
|
|
$content = file_get_contents($filePath, false, null, 0, 10000);
|
|
|
|
// Verificar BOM UTF-8
|
|
if (substr($content, 0, 3) === "\xEF\xBB\xBF") {
|
|
return 'UTF-8';
|
|
}
|
|
|
|
// Tentar detectar encoding
|
|
$encoding = mb_detect_encoding($content, ['UTF-8', 'ISO-8859-1', 'Windows-1252', 'ASCII'], true);
|
|
|
|
return $encoding ?: 'UTF-8';
|
|
}
|
|
|
|
/**
|
|
* Check if parser supports the extension
|
|
*/
|
|
public static function supports(string $extension): bool
|
|
{
|
|
return in_array(strtolower($extension), self::$supportedExtensions);
|
|
}
|
|
}
|