79 lines
2.1 KiB
PHP
79 lines
2.1 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
class DataSetReader {
|
|
public array $lines = [];
|
|
private array $currentLines = [];
|
|
|
|
private int $lastReadLineIndex = -1;
|
|
|
|
public function __construct(
|
|
public string $filename,
|
|
) {
|
|
// For now, we only support CSV files, so we can delegate to CsvReader
|
|
$csvReader = new CsvReader($filename);
|
|
$this->readEntireFile($csvReader);
|
|
$this->reset();
|
|
}
|
|
|
|
private function readEntireFile(CsvReader $reader): void
|
|
{
|
|
while ($line = $reader->readNextLine()) {
|
|
$newLine = [];
|
|
foreach ($line as $value) { // Transform to float
|
|
$newLine[] = (float) $value;
|
|
}
|
|
|
|
// if the dataset is for regression, we add a fake label of 0
|
|
if (count($newLine) === 2) {
|
|
$newLine[] = 0.0;
|
|
}
|
|
|
|
$this->lines[] = $newLine;
|
|
}
|
|
}
|
|
|
|
public function getRandomLine(): array | null
|
|
{
|
|
if (empty($this->currentLines)) {
|
|
return null; // No more lines to read
|
|
}
|
|
$randomNumber = array_rand($this->currentLines);
|
|
$randomLine = $this->currentLines[$randomNumber];
|
|
|
|
// Remove the line from the current lines to avoid repetition
|
|
unset($this->currentLines[$randomNumber]);
|
|
|
|
// Remember the index of the last read line in the full list
|
|
$this->lastReadLineIndex = array_search($randomLine, $this->lines, true);
|
|
|
|
return $randomLine;
|
|
}
|
|
|
|
public function getNextLine(): array | null {
|
|
if (!isset($this->currentLines[0])) {
|
|
return null; // No more lines to read
|
|
}
|
|
|
|
$this->lastReadLineIndex = array_search($this->currentLines[0], $this->lines, true);
|
|
|
|
return array_shift($this->currentLines);
|
|
}
|
|
|
|
public function getInputSize(): int
|
|
{
|
|
return count($this->lines[0]) - 1; // Don't count the label
|
|
}
|
|
|
|
public function reset(): void
|
|
{
|
|
$this->currentLines = $this->lines;
|
|
}
|
|
|
|
public function getLastReadLineIndex(): int
|
|
{
|
|
return $this->lastReadLineIndex;
|
|
}
|
|
}
|