<?php

namespace Daylight\Connector2BA\Service;

use Daylight\Connector2BA\Mappers\ArtLevMapping;
use Daylight\Connector2BA\Models\DataItem;

class PAB2Reader
{
    public function __construct(
        protected string $filePath,
        protected string $fileName
    ) {
        // Constructor now only sets properties, readFile() must be called explicitly
    }


    public function readFile(int $batchSize = 100): \Generator
    {
        switch ($this->fileName) {
            case 'ArtLev.txt':
                $fields = ArtLevMapping::all();
                break;
            default:
                $fields = [];
        }

        if (empty($fields)) {
            throw new \InvalidArgumentException("No field mapping found for file: {$this->fileName}");
        }

        $batch = [];
        $lineNumber = 0;

        $fullPath = rtrim($this->filePath, '/') . '/' . $this->fileName;

        if (!file_exists($fullPath)) {
            throw new \RuntimeException("File not found: {$fullPath}");
        }

        $handle = fopen($fullPath, 'rb'); // b = binary, geen impliciete conversie
        if (!$handle) {
            throw new \RuntimeException("Cannot open file for reading: {$fullPath}");
        }

        // Helper: detecteer encoding aan de hand van de eerste niet-lege regel
        $sourceEnc = null;
        $peekPos = ftell($handle);
        while (!feof($handle)) {
            $probe = fgets($handle);
            if ($probe === false) break;

            // strip BOM alleen op allereerste bytes
            if ($lineNumber === 0) {
                $probe = preg_replace('/^\xEF\xBB\xBF/', '', $probe ?? '');
            }
            $tmp = trim(str_replace("\x00", '', (string)$probe));
            if ($tmp !== '') {
                $sourceEnc = mb_detect_encoding($probe, ['UTF-8','Windows-1252','ISO-8859-1','ISO-8859-15'], true) ?: 'Windows-1252';
                break;
            }
        }
        // reset naar begin
        fseek($handle, $peekPos);

        try {
            $first = true;
            while (($line = fgets($handle)) !== false) {
                $lineNumber++;

                if ($first) {
                    // verwijder UTF-8 BOM als die er is
                    $line = preg_replace('/^\xEF\xBB\xBF/', '', $line);
                    $first = false;
                }

                // verwijder nulbytes, normaliseer line endings en trim *alleen* voor leegte-check
                $rawLine = str_replace("\x00", '', $line);
                $rawLine = rtrim($rawLine, "\r\n");

                if ($rawLine === '') {
                    continue;
                }

                $data = [];

                foreach ($fields as $field) {
                    // Let op: positions zijn 1-based in mapping
                    $startPos = (int)($field['start'] - 1);
                    $length   = (int)$field['length'];

                    // 1) slice op ruwe bytes (bron-encoding)
                    $slice = substr($rawLine, $startPos, $length);

                    // 2) converteer per slice naar UTF-8
                    $enc = $sourceEnc ?: 'Windows-1252';
                    $utf8 = @mb_convert_encoding($slice, 'UTF-8', $enc);

                    // 3) verwijder ongeldige restbytes (replacement char voorkomen)
                    $utf8 = @iconv('UTF-8', 'UTF-8//IGNORE', $utf8) ?: '';

                    // 4) strip control chars (laat spaties/cijfers/letters/punct staan)
                    $utf8 = preg_replace('/[\p{C}\x1B]/u', '', $utf8);

                    // 5) trim
                    $value = trim($utf8);

                    $data[$field['key']] = $value;
                }

                // Extra sanity check: als *alles* leeg is, sla de regel over
                $nonEmpty = array_filter($data, fn($v) => $v !== '' && $v !== null);
                if (empty($nonEmpty)) {
                    Log::warning('Skipping empty/invalid line after normalization', [
                        'file'        => $this->fileName,
                        'line_number' => $lineNumber,
                        'preview'     => mb_substr(@mb_convert_encoding($rawLine, 'UTF-8', $sourceEnc ?: 'Windows-1252'), 0, 200),
                        'encoding'    => $sourceEnc,
                    ]);
                    continue;
                }

                // Voeg line number toe voor downstream debug
                $data['_line_number'] = $lineNumber;
                $batch[] = $data;

                if (count($batch) >= $batchSize) {
                    yield $batch;
                    $batch = [];
                }
            }
        } finally {
            fclose($handle);
        }

        if (!empty($batch)) {
            yield $batch;
        }
    }

}
