#!/usr/bin/env node
/**
 * Scrape 2020 Ghana Presidential data from Peace FM Online (www.peacefmonline.com).
 * Uses Playwright so JS-rendered content is captured; same technique as 3 News.
 *
 * Usage:
 *   npx playwright install chromium   # first time only
 *   node scripts/scrape-peacefm-2020.mjs
 *
 * Output: storage/app/peacefm_2020_presidential.json (for Laravel import).
 */

import { chromium } from 'playwright';
import { writeFileSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const BASE = join(__dirname, '..');
const OUT_PATH = join(BASE, 'storage', 'app', 'peacefm_2020_presidential.json');

const BASE_URL = 'https://www.peacefmonline.com';
const PAGES = [
  { key: 'president', url: `${BASE_URL}/pages/2020/president` },
  { key: 'swing', url: `${BASE_URL}/pages/2020/president/swing` },
  { key: 'marginal', url: `${BASE_URL}/pages/2020/president/marginal` },
  { key: 'map', url: `${BASE_URL}/pages/2020/president/map` },
  { key: 'regional_analysis', url: `${BASE_URL}/pages/2020/analysis/president` },
  { key: 'constituency_analysis', url: `${BASE_URL}/pages/2020/analysis/president/constituency` },
];

async function extractPageData(page) {
  const tables = await page.evaluate(() => {
    const out = [];
    document.querySelectorAll('table').forEach((t, i) => {
      const rows = [];
      t.querySelectorAll('tr').forEach((tr) => {
        const cells = Array.from(tr.querySelectorAll('td, th')).map((c) => c.innerText.trim());
        if (cells.some(Boolean)) rows.push(cells);
      });
      if (rows.length) out.push({ index: i, rows });
    });
    return out;
  });

  const mainText = await page.evaluate(() => {
    const sel = document.querySelector('main') || document.querySelector('article') || document.body;
    return sel ? sel.innerText : '';
  });

  return { tables, mainText };
}

function parseNationalFromText(mainText) {
  const national = { npp: null, ndc: null };
  // Peace FM president page: "Akufo-Addo NPP ... Mahama NDC ... 51.27% 47.39% 6,777,325 votes 6,264,517 votes"
  const pctMatch = mainText.match(/([\d.]+)\s*%\s*([\d.]+)\s*%\s*([\d,]+)\s*votes?\s*([\d,]+)\s*votes?/);
  if (pctMatch) {
    const nppPct = parseFloat(pctMatch[1]);
    const ndcPct = parseFloat(pctMatch[2]);
    const nppVotes = parseInt(String(pctMatch[3]).replace(/,/g, ''), 10);
    const ndcVotes = parseInt(String(pctMatch[4]).replace(/,/g, ''), 10);
    national.npp = { votes: nppVotes, percentage: nppPct };
    national.ndc = { votes: ndcVotes, percentage: ndcPct };
    return national;
  }
  const nppRe = /NPP[\s\S]*?(\d{1,3}(?:,\d{3})*)\s*votes?\s*\(?([\d.]+)\s*%?\)?/i;
  const ndcRe = /NDC[\s\S]*?(\d{1,3}(?:,\d{3})*)\s*votes?\s*\(?([\d.]+)\s*%?\)?/i;
  let m = nppRe.exec(mainText);
  if (m) national.npp = { votes: parseInt(m[1].replace(/,/g, ''), 10), percentage: parseFloat(m[2]) };
  m = ndcRe.exec(mainText);
  if (m) national.ndc = { votes: parseInt(m[1].replace(/,/g, ''), 10), percentage: parseFloat(m[2]) };
  return national;
}

const REGION_ORDER = ['Ahafo', 'Ashanti', 'Bono', 'Bono East', 'Central', 'Eastern', 'Greater Accra', 'North East', 'Northern', 'Oti', 'Savannah', 'Upper East', 'Upper West', 'Volta', 'Western', 'Western North'];

function parseRegionalVotesFromTables(tables) {
  const rows = [];
  tables.forEach((t, tableIndex) => {
    const regionName = REGION_ORDER[tableIndex] || `Region_${tableIndex}`;
    let nppVotes = 0; let nppPct = 0; let ndcVotes = 0; let ndcPct = 0;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const label = (row[0] || '').toLowerCase();
      const votes = parseInt(String(row[1] || '').replace(/,/g, ''), 10) || 0;
      const pct = parseFloat(String(row[2] || '').replace('%', '')) || 0;
      if (label.includes('nana') || label.includes('akufo') || label.includes('npp')) {
        nppVotes = votes; nppPct = pct;
      } else if (label.includes('mahama') || label.includes('john') || label.includes('ndc')) {
        ndcVotes = votes; ndcPct = pct;
      }
    }
    if (nppVotes > 0 || ndcVotes > 0) {
      rows.push({ region: regionName, npp_votes: nppVotes, npp_pct: nppPct, ndc_votes: ndcVotes, ndc_pct: ndcPct });
    }
  });
  return rows;
}

function parseSwingListFromTables(tables) {
  const list = [];
  for (const t of tables) {
    const headers = (t.rows[0] || []).map((c) => c.toLowerCase());
    const constituencyIdx = headers.findIndex((h) => h.includes('constituency'));
    const regionIdx = headers.findIndex((h) => h.includes('region'));
    const col2016 = headers.findIndex((h) => h.includes('2016') || h === 'npp' || h === 'ndc');
    const col2020 = headers.findIndex((h) => h.includes('2020'));
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const constituency = constituencyIdx >= 0 ? (row[constituencyIdx] || '').trim() : (row[0] || '').trim();
      const region = regionIdx >= 0 ? (row[regionIdx] || '').trim() : (row[2] || '').trim();
      if (!constituency || constituency === 'Constituency' || constituency === '---') continue;
      const pct2016 = parseFloat(String(row[col2016 >= 0 ? col2016 : 3] || '').replace('%', '')) || 0;
      const pct2020 = parseFloat(String(row[col2020 >= 0 ? col2020 : 5] || '').replace('%', '')) || 0;
      list.push({
        no: list.length + 1,
        constituency,
        region,
        '2016_majority_pct': pct2016,
        '2016_party': row[col2016 + 1] || '',
        '2020_party': row[col2020 + 1] || '',
        '2020_majority_pct': pct2020,
      });
    }
  }
  return list;
}

function parseMarginalFromTables(tables) {
  const list = [];
  for (const t of tables) {
    const header = (t.rows[0] || []).map((c) => c.toLowerCase()).join(' ');
    if (!header.includes('constituency') && !header.includes('vote')) continue;
    for (let i = 1; i < t.rows.length; i++) {
      const row = t.rows[i];
      const constituency = (row[0] || '').trim();
      const voteDiff = (row[1] || row[2] || '').trim();
      if (!constituency || constituency === '---') continue;
      list.push({ constituency, vote_difference: voteDiff });
    }
  }
  return list;
}

function parseRegionalComparisonFromTables(tables) {
  const regionNames = new Set(['Ahafo', 'Ashanti', 'Bono', 'Bono East', 'Central', 'Eastern', 'Greater Accra', 'North East', 'Northern', 'Oti', 'Savannah', 'Upper East', 'Upper West', 'Volta', 'Western', 'Western North']);
  const rows = [];
  for (const t of tables) {
    for (const row of t.rows) {
      const first = (row[0] || '').trim();
      if (!regionNames.has(first)) continue;
      const nums = row.slice(1).map((c) => parseFloat(String(c).replace('%', '').replace('+', '')) || 0);
      if (nums.length >= 6) {
        rows.push({
          region: first,
          ndc_2016: nums[0],
          ndc_2020: nums[1],
          ndc_gl: nums[2],
          npp_2016: nums[3],
          npp_2020: nums[4],
          npp_gl: nums[5],
        });
      }
    }
  }
  return rows;
}

async function main() {
  let browser;
  const output = {
    source: 'www.peacefmonline.com',
    scraped_at: new Date().toISOString(),
    election_year: 2020,
    election_type: 'presidential',
    pages: {},
  };

  try {
    browser = await chromium.launch({ headless: true });
    const page = await browser.newPage();
    await page.setExtraHTTPHeaders({ 'Accept-Language': 'en-GB,en;q=0.9' });
    await page.setViewportSize({ width: 1280, height: 800 });

    for (const { key, url } of PAGES) {
      console.log('Fetching:', url);
      try {
        await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 20000 });
        await new Promise((r) => setTimeout(r, 3000));
        const { tables, mainText } = await extractPageData(page);
        output.pages[key] = { url, tables, mainTextSnippet: mainText.slice(0, 3000) };

        if (key === 'president') {
          output.national = {
            national: parseNationalFromText(mainText),
            regional_votes: parseRegionalVotesFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'swing') {
          output.swing = {
            swing_list: parseSwingListFromTables(tables),
            by_region: tables[0] ? tables[0].rows.slice(1).map((r) => ({ region: r[0], total: r[1], constituencies: r[2] })).filter((r) => r.region && r.region !== 'Region') : [],
            raw_tables: tables,
          };
        } else if (key === 'marginal') {
          output.marginal = {
            marginal_list: parseMarginalFromTables(tables),
            raw_tables: tables,
          };
        } else if (key === 'regional_analysis') {
          output.regional_analysis = {
            regional_comparison: parseRegionalComparisonFromTables(tables),
            raw_tables: tables,
          };
        }
      } catch (e) {
        console.warn('Error on', url, e.message);
        output.pages[key] = { url, error: e.message };
      }
    }
  } finally {
    if (browser) await browser.close();
  }

  mkdirSync(dirname(OUT_PATH), { recursive: true });
  writeFileSync(OUT_PATH, JSON.stringify(output, null, 2), 'utf8');
  console.log('Wrote:', OUT_PATH);
  console.log('National NPP:', output.national?.national?.npp ? 'yes' : 'no');
  console.log('National NDC:', output.national?.national?.ndc ? 'yes' : 'no');
  console.log('Regional votes count:', output.national?.regional_votes?.length ?? 0);
  console.log('Swing list count:', output.swing?.swing_list?.length ?? 0);
  console.log('Marginal list count:', output.marginal?.marginal_list?.length ?? 0);
  console.log('Regional comparison count:', output.regional_analysis?.regional_comparison?.length ?? 0);
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
