#!/usr/bin/env node
/**
 * Scrape 2020 Ghana Presidential results from 3 News (elections.3news.com).
 * Uses Playwright for browser automation so JS-rendered content is captured.
 *
 * Usage:
 *   npx playwright install chromium   # first time only
 *   node scripts/scrape-3news-2020.mjs
 *
 * Output: writes storage/app/3news_2020_presidential.json (for Laravel import).
 */

import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const BASE = join(__dirname, '..');
const OUT_PATH = join(BASE, 'storage', 'app', '3news_2020_presidential.json');
const URL = 'https://elections.3news.com/results/2020/presidential';

async function extractTables(page) {
  return page.evaluate(() => {
    const tables = document.querySelectorAll('table');
    const out = [];
    tables.forEach((t, i) => {
      const rows = t.querySelectorAll('tr');
      const arr = [];
      rows.forEach(tr => {
        const cells = tr.querySelectorAll('td, th');
        arr.push(Array.from(cells).map(c => c.innerText.trim()).filter(Boolean));
      });
      if (arr.length) out.push({ tableIndex: i, rows: arr });
    });
    return out;
  });
}

async function main() {
  let browser;
  try {
    browser = await chromium.launch({ headless: true });
    const page = await browser.newPage();
    await page.goto(URL, { waitUntil: 'domcontentloaded', timeout: 30000 });
    await new Promise(r => setTimeout(r, 4000));

    const tables = await extractTables(page);

    const regions = (tables[0]?.rows || []).map(r => r[0]).filter(Boolean);
    const constituencies = (tables[1]?.rows || []).map(r => r[0]).filter(Boolean);
    const candidateWins = (tables[2]?.rows || []).slice(1).map(r => ({
      candidate: r[0],
      won: parseInt(r[1], 10) || 0,
      gained: parseInt(r[2], 10) || 0,
      lost: parseInt(r[3], 10) || 0,
      net: parseInt(r[4], 10) || 0,
    }));

    const regionalPct2020 = [];
    const regionOrder = ['Ahafo', 'Ashanti', 'Bono', 'Bono East', 'Central', 'Eastern', 'Greater Accra', 'North East', 'Northern', 'Oti', 'Savannah', 'Upper East', 'Upper West', 'Volta', 'Western', 'Western North'];
    (tables.slice(3, 19) || []).forEach((t, i) => {
      const row = t.rows?.[1];
      if (row && row.length >= 8) {
        const pct2020 = row[7]?.replace('%', '') || null;
        regionalPct2020.push({
          region: regionOrder[i] || regions[i] || `Region_${i}`,
          npp_pct_2020: pct2020 ? parseFloat(pct2020) : null,
        });
      }
    });

    const payload = {
      source_url: URL,
      scraped_at: new Date().toISOString(),
      election_year: 2020,
      election_type: 'presidential',
      regions,
      constituencies,
      candidate_constituency_wins: candidateWins,
      regional_npp_pct_2020: regionalPct2020,
      raw_tables: tables,
    };

    mkdirSync(dirname(OUT_PATH), { recursive: true });
    writeFileSync(OUT_PATH, JSON.stringify(payload, null, 2), 'utf8');
    console.log('Wrote:', OUT_PATH);
    console.log('Regions:', regions.length);
    console.log('Constituencies:', constituencies.length);
    console.log('Candidates:', candidateWins.length);
    console.log('Regional NPP % 2020:', regionalPct2020.length);
  } finally {
    if (browser) await browser.close();
  }
}

main().catch(err => {
  console.error(err);
  process.exit(1);
});
