<?php
/**
 * Random Forest
 * Implementasi sederhana algoritma Random Forest untuk prediksi
 * TIDAK memerlukan library eksternal, menggunakan PHP murni
 */

/**
 * Kelas DecisionTreeNode untuk menyimpan node pohon keputusan
 */
class DecisionTreeNode {
    public $featureIndex;
    public $threshold;
    public $left;
    public $right;
    public $value;
    public $isLeaf;
    
    public function __construct() {
        $this->isLeaf = false;
    }
}

/**
 * Kelas RegressionTree untuk membuat pohon regresi
 */
class RegressionTree {
    private $root;
    private $maxDepth;
    private $minSamplesSplit;
    private $minSamplesLeaf;
    
    /**
     * Constructor
     * 
     * @param int|null $maxDepth Kedalaman maksimal pohon
     * @param int $minSamplesSplit Jumlah sampel minimal untuk split
     * @param int $minSamplesLeaf Jumlah sampel minimal untuk leaf
     */
    public function __construct($maxDepth = null, $minSamplesSplit = 2, $minSamplesLeaf = 1) {
        $this->maxDepth = $maxDepth;
        $this->minSamplesSplit = $minSamplesSplit;
        $this->minSamplesLeaf = $minSamplesLeaf;
    }
    
    /**
     * Melatih pohon dengan data training
     * 
     * @param array $X Data features
     * @param array $y Data target
     */
    public function train($X, $y) {
        $this->root = $this->buildTree($X, $y, 0);
    }
    
    /**
     * Membangun pohon keputusan secara rekursif
     * 
     * @param array $X Data features
     * @param array $y Data target
     * @param int $depth Kedalaman saat ini
     * @return DecisionTreeNode Node pohon
     */
    private function buildTree($X, $y, $depth) {
        $numSamples = count($X);
        $numFeatures = count($X[0]);
        
        $node = new DecisionTreeNode();
        
        // Kondisi berhenti: jumlah sampel kurang dari min_samples_split, kedalaman maksimal tercapai, atau semua target sama
        if ($numSamples < $this->minSamplesSplit || ($this->maxDepth !== null && $depth >= $this->maxDepth) || $this->allSame($y)) {
            $node->isLeaf = true;
            $node->value = $this->calculateLeafValue($y);
            return $node;
        }
        
        // Cari split terbaik
        $bestSplit = $this->getBestSplit($X, $y);
        
        // Jika tidak ada split yang valid
        if ($bestSplit === null) {
            $node->isLeaf = true;
            $node->value = $this->calculateLeafValue($y);
            return $node;
        }
        
        $featureIndex = $bestSplit['feature_index'];
        $threshold = $bestSplit['threshold'];
        $leftIndices = $bestSplit['left_indices'];
        $rightIndices = $bestSplit['right_indices'];
        
        // Pastikan jumlah sampel di kedua child lebih dari min_samples_leaf
        if (count($leftIndices) < $this->minSamplesLeaf || count($rightIndices) < $this->minSamplesLeaf) {
            $node->isLeaf = true;
            $node->value = $this->calculateLeafValue($y);
            return $node;
        }
        
        // Buat node internal
        $node->featureIndex = $featureIndex;
        $node->threshold = $threshold;
        
        // Ekstrak data untuk child nodes
        $leftX = $this->getSubset($X, $leftIndices);
        $leftY = $this->getSubset($y, $leftIndices);
        $rightX = $this->getSubset($X, $rightIndices);
        $rightY = $this->getSubset($y, $rightIndices);
        
        // Bangun child nodes secara rekursif
        $node->left = $this->buildTree($leftX, $leftY, $depth + 1);
        $node->right = $this->buildTree($rightX, $rightY, $depth + 1);
        
        return $node;
    }
    
    /**
     * Mendapatkan split terbaik berdasarkan MSE terendah
     * 
     * @param array $X Data features
     * @param array $y Data target
     * @return array|null Informasi split terbaik atau null jika tidak ada
     */
    private function getBestSplit($X, $y) {
        $numSamples = count($X);
        $numFeatures = count($X[0]);
        
        $bestMse = PHP_FLOAT_MAX;
        $bestSplit = null;
        
        // Coba setiap fitur
        for ($featureIndex = 0; $featureIndex < $numFeatures; $featureIndex++) {
            $featureValues = array_column($X, $featureIndex);
            $uniqueValues = array_unique($featureValues);
            
            // Coba beberapa nilai threshold (tidak semua, untuk efisiensi)
            $numThresholds = min(count($uniqueValues), 10); // Batasi jumlah threshold yang dicoba
            if ($numThresholds <= 1) continue;
            
            $step = count($uniqueValues) / $numThresholds;
            for ($i = 1; $i < $numThresholds; $i++) {
                $idx = (int) ($i * $step);
                $threshold = $uniqueValues[$idx];
                
                // Split data
                $leftIndices = [];
                $rightIndices = [];
                for ($j = 0; $j < $numSamples; $j++) {
                    if ($X[$j][$featureIndex] <= $threshold) {
                        $leftIndices[] = $j;
                    } else {
                        $rightIndices[] = $j;
                    }
                }
                
                // Hitung MSE
                if (!empty($leftIndices) && !empty($rightIndices)) {
                    $leftY = $this->getSubset($y, $leftIndices);
                    $rightY = $this->getSubset($y, $rightIndices);
                    
                    $leftMean = array_sum($leftY) / count($leftY);
                    $rightMean = array_sum($rightY) / count($rightY);
                    
                    $leftMse = $this->calculateMse($leftY, $leftMean);
                    $rightMse = $this->calculateMse($rightY, $rightMean);
                    
                    $weightedMse = (count($leftY) * $leftMse + count($rightY) * $rightMse) / $numSamples;
                    
                    if ($weightedMse < $bestMse) {
                        $bestMse = $weightedMse;
                        $bestSplit = [
                            'feature_index' => $featureIndex,
                            'threshold' => $threshold,
                            'left_indices' => $leftIndices,
                            'right_indices' => $rightIndices
                        ];
                    }
                }
            }
        }
        
        return $bestSplit;
    }
    
    /**
     * Memprediksi nilai dengan pohon yang sudah dilatih
     * 
     * @param array $X Data features
     * @return array Prediksi
     */
    public function predict($X) {
        $predictions = [];
        
        // Jika $X hanya satu sample (bukan array 2D)
        if (!is_array($X[0])) {
            return $this->traverseTree($this->root, $X);
        }
        
        // Jika $X adalah array 2D (multiple samples)
        foreach ($X as $sample) {
            $predictions[] = $this->traverseTree($this->root, $sample);
        }
        
        return $predictions;
    }
    
    /**
     * Melintasi pohon untuk memprediksi target dari satu sample
     * 
     * @param DecisionTreeNode $node Node saat ini
     * @param array $x Sample
     * @return float Nilai prediksi
     */
    private function traverseTree($node, $x) {
        if ($node->isLeaf) {
            return $node->value;
        }
        
        if ($x[$node->featureIndex] <= $node->threshold) {
            return $this->traverseTree($node->left, $x);
        } else {
            return $this->traverseTree($node->right, $x);
        }
    }
    
    /**
     * Helper method untuk menghitung MSE (Mean Squared Error)
     * 
     * @param array $y Data target
     * @param float $mean Rata-rata target
     * @return float MSE
     */
    private function calculateMse($y, $mean) {
        $squaredErrors = [];
        foreach ($y as $value) {
            $squaredErrors[] = pow($value - $mean, 2);
        }
        return array_sum($squaredErrors) / count($y);
    }
    
    /**
     * Helper method untuk mendapatkan subset dari array berdasarkan indeks
     * 
     * @param array $data Data array
     * @param array $indices Indeks yang akan diambil
     * @return array Subset data
     */
    private function getSubset($data, $indices) {
        $subset = [];
        foreach ($indices as $index) {
            $subset[] = $data[$index];
        }
        return $subset;
    }
    
    /**
     * Helper method untuk mengecek apakah semua nilai dalam array sama
     * 
     * @param array $arr Array
     * @return bool True jika semua nilai sama
     */
    private function allSame($arr) {
        return count(array_unique($arr)) === 1;
    }
    
    /**
     * Helper method untuk menghitung nilai untuk leaf node (rata-rata target)
     * 
     * @param array $y Data target
     * @return float Nilai untuk leaf node
     */
    private function calculateLeafValue($y) {
        return array_sum($y) / count($y);
    }
}

/**
 * Kelas RandomForestRegressor untuk prediksi menggunakan ensemble dari pohon regresi
 */
class RandomForestRegressor {
    private $trees = [];
    private $nEstimators;
    private $maxDepth;
    private $minSamplesSplit;
    private $minSamplesLeaf;
    
    /**
     * Constructor
     * 
     * @param int $nEstimators Jumlah pohon dalam forest
     * @param int|null $maxDepth Kedalaman maksimal pohon
     * @param int $minSamplesSplit Jumlah sampel minimal untuk split
     * @param int $minSamplesLeaf Jumlah sampel minimal untuk leaf
     */
    public function __construct($nEstimators = 100, $maxDepth = null, $minSamplesSplit = 2, $minSamplesLeaf = 1) {
        $this->nEstimators = $nEstimators;
        $this->maxDepth = $maxDepth;
        $this->minSamplesSplit = $minSamplesSplit;
        $this->minSamplesLeaf = $minSamplesLeaf;
    }
    
    /**
     * Melatih model Random Forest
     * 
     * @param array $X Data features
     * @param array $y Data target
     */
    public function train($X, $y) {
        $numSamples = count($X);
        
        // Reset trees
        $this->trees = [];
        
        // Buat nEstimators pohon
        for ($i = 0; $i < $this->nEstimators; $i++) {
            // Bootstrap: sampling dengan pengembalian
            $bootstrapIndices = $this->bootstrap($numSamples);
            $bootstrapX = $this->getSubset($X, $bootstrapIndices);
            $bootstrapY = $this->getSubset($y, $bootstrapIndices);
            
            // Buat dan latih pohon
            $tree = new RegressionTree($this->maxDepth, $this->minSamplesSplit, $this->minSamplesLeaf);
            $tree->train($bootstrapX, $bootstrapY);
            
            // Tambahkan pohon ke forest
            $this->trees[] = $tree;
        }
    }
    
    /**
     * Memprediksi target dengan Random Forest
     * 
     * @param array $X Data features
     * @return array Prediksi
     */
    public function predict($X) {
        // Jika tidak ada pohon, return empty array
        if (empty($this->trees)) {
            return [];
        }
        
        // Jika $X hanya satu sample (bukan array 2D)
        if (!is_array($X[0])) {
            // Prediksi dari setiap pohon
            $predictions = [];
            foreach ($this->trees as $tree) {
                $predictions[] = $tree->predict([$X])[0];
            }
            // Return rata-rata prediksi
            return array_sum($predictions) / count($predictions);
        }
        
        // Jika $X adalah array 2D (multiple samples)
        $predictions = [];
        foreach ($X as $sample) {
            // Prediksi dari setiap pohon untuk sample ini
            $treePredictions = [];
            foreach ($this->trees as $tree) {
                $treePredictions[] = $tree->predict([$sample])[0];
            }
            // Rata-rata prediksi
            $predictions[] = array_sum($treePredictions) / count($treePredictions);
        }
        
        return $predictions;
    }
    
    /**
     * Bootstrap: sampling dengan pengembalian
     * 
     * @param int $numSamples Jumlah sampel total
     * @return array Indeks sampel yang terpilih
     */
    private function bootstrap($numSamples) {
        $indices = [];
        for ($i = 0; $i < $numSamples; $i++) {
            $indices[] = rand(0, $numSamples - 1);
        }
        return $indices;
    }
    
    /**
     * Helper method untuk mendapatkan subset dari array berdasarkan indeks
     * 
     * @param array $data Data array
     * @param array $indices Indeks yang akan diambil
     * @return array Subset data
     */
    private function getSubset($data, $indices) {
        $subset = [];
        foreach ($indices as $index) {
            $subset[] = $data[$index];
        }
        return $subset;
    }
}

/**
 * Fungsi untuk menghitung Mean Absolute Error (MAE)
 * 
 * @param array $yTrue Nilai sebenarnya
 * @param array $yPred Nilai prediksi
 * @return float MAE
 */
function meanAbsoluteError($yTrue, $yPred) {
    $sum = 0;
    $n = count($yTrue);
    
    for ($i = 0; $i < $n; $i++) {
        $sum += abs($yTrue[$i] - $yPred[$i]);
    }
    
    return $sum / $n;
}

/**
 * Fungsi untuk menghitung Mean Squared Error (MSE)
 * 
 * @param array $yTrue Nilai sebenarnya
 * @param array $yPred Nilai prediksi
 * @return float MSE
 */
function meanSquaredError($yTrue, $yPred) {
    $sum = 0;
    $n = count($yTrue);
    
    for ($i = 0; $i < $n; $i++) {
        $sum += pow($yTrue[$i] - $yPred[$i], 2);
    }
    
    return $sum / $n;
}

/**
 * Membuat fitur dari data harga saham
 * 
 * @param array $data Data harga saham
 * @return array Data fitur dan target
 */
function prepareFeatures($data) {
    $features = [];
    $targets = [];
    
    // Jumlah hari untuk dilihat ke belakang (window size)
    $lookback = 5;
    
    // Minimal data adalah lookback + 1 (untuk target)
    if (count($data) <= $lookback) {
        return [
            'features' => [],
            'targets' => []
        ];
    }
    
    // Buat features dari data historis
    for ($i = $lookback; $i < count($data); $i++) {
        $feature = [];
        
        // Fitur dari harga penutupan beberapa hari sebelumnya
        for ($j = 1; $j <= $lookback; $j++) {
            $feature[] = (float) $data[$i - $j]['close'];
        }
        
        // Tambahkan fitur lain jika diperlukan
        // Contoh: moving average, volume, dll.
        
        $features[] = $feature;
        $targets[] = (float) $data[$i]['close'];
    }
    
    return [
        'features' => $features,
        'targets' => $targets,
        'dates' => array_slice(array_column($data, 'date'), $lookback)
    ];
}

/**
 * Menjalankan proses prediksi dengan Random Forest
 * 
 * @param array $data Data saham
 * @param array $settings Pengaturan model
 * @return array|false Hasil prediksi jika berhasil, false jika gagal
 */
function runRandomForest($data, $settings) {
    try {
        // Siapkan data
        $preparedData = prepareFeatures($data);
        
        if (empty($preparedData['features'])) {
            return false;
        }
        
        $features = $preparedData['features'];
        $targets = $preparedData['targets'];
        $dates = $preparedData['dates'];
        
        // Rasio training-testing
        $trainTestRatio = $settings['train_test_ratio'];
        $trainSize = (int)(count($features) * $trainTestRatio);
        
        // Split data menjadi training dan testing
        $trainFeatures = array_slice($features, 0, $trainSize);
        $testFeatures = array_slice($features, $trainSize);
        $trainTargets = array_slice($targets, 0, $trainSize);
        $testTargets = array_slice($targets, $trainSize);
        $testDates = array_slice($dates, $trainSize);
        
        // Buat dan latih model Random Forest
        $nEstimators = min($settings['n_estimators'], 20); // Batasi jumlah pohon untuk performa
        $model = new RandomForestRegressor(
            $nEstimators, 
            $settings['max_depth'], 
            $settings['min_samples_split'],
            $settings['min_samples_leaf']
        );
        
        // Training model
        $model->train($trainFeatures, $trainTargets);
        
        // Prediksi pada data testing
        $predictions = $model->predict($testFeatures);
        
        // Hitung metrik evaluasi
        $mae = meanAbsoluteError($testTargets, $predictions);
        $mse = meanSquaredError($testTargets, $predictions);
        $rmse = sqrt($mse);
        
        // Format hasil
        $result = [
            'predictions' => [],
            'metrics' => [
                'mae' => $mae,
                'rmse' => $rmse
            ]
        ];
        
        // Format prediksi
        for ($i = 0; $i < count($testDates); $i++) {
            $result['predictions'][] = [
                'date' => $testDates[$i],
                'actual_price' => $testTargets[$i],
                'predicted_price' => $predictions[$i]
            ];
        }
        
        return $result;
    } catch (Exception $e) {
        error_log('Random Forest Error: ' . $e->getMessage());
        return false;
    }
    
}