EFTCOMPANION/update-goons-status.py

#!/usr/bin/python3
"""
update-goons-status.py
Fetch "Last Seen" blocks from goon-tracker.com (PvP + PvE) and write a local JSON.

Pages & markers (as of today):
- PvP page: https://www.goon-tracker.com/
  Shows "Last Seen: <Map>" and a "Time:" line.  (wording: "Last Seen:")  [ref]
- PvE page: https://www.goon-tracker.com/pvetracker
  Shows "Last Seen on PvE Mode:" then the map + "Time:" + "Last seen:".       [ref]  [1](https://github.com/the-hideout/tarkov-api/issues/133)
"""
import json
import os
import re
import sys
from datetime import datetime, timezone

import requests
from bs4 import BeautifulSoup

UA = "EFTC-CompanionBot/1.0 (+yourdomain; admin@yourdomain)"
TIMEOUT = 15
OUT = "/var/www/EFT_COMPANION/public_html/goons-status.json"

PVP_URL = "https://www.goon-tracker.com/"
PVE_URL = "https://www.goon-tracker.com/pvetracker"

# Common EFT map names (helps with fallback parsing)
KNOWN_MAPS = {
    "Customs","Shoreline","Lighthouse","Woods","Streets","Reserve",
    "Interchange","Factory","Labs","Ground","GroundZero","GroundZero","The","TheLab","Lab","TheLab"
}

def fetch_html(url: str) -> str:
    r = requests.get(url, headers={"User-Agent": UA}, timeout=TIMEOUT)
    r.raise_for_status()
    return r.text

def text_nodes(soup):
    for el in soup.find_all(string=True):
        txt = (el or "").strip()
        if txt:
            yield txt

def clean_token(s: str) -> str:
    return re.sub(r"\s+", " ", s).strip()

def parse_time_and_ago(all_text):
    time_text, last_seen_text = "", ""
    for t in all_text:
        if t.startswith("Time:"):
            time_text = t.split("Time:", 1)[-1].strip()
        elif t.startswith("Last seen:"):
            last_seen_text = t.split("Last seen:", 1)[-1].strip()
    return time_text, last_seen_text

def parse_pvp(html: str):
    """
    Strategy (PvP):
      1) Find a text line that starts with "Last Seen".
      2) Try to capture the map on the same line or the next nearby text node(s).
      3) Also collect "Time:" and "Last seen:" if present.
    The PvP homepage wording is "Last Seen: <Map>" (without 'PvE').  [ref]
    """
    soup = BeautifulSoup(html, "html.parser")
    all_text = list(text_nodes(soup))
    last_seen_map = None
    time_text, last_seen_text = "", ""

    # 1) Direct regex on any line containing "Last Seen"
    for idx, t in enumerate(all_text):
        if t.lower().startswith("last seen"):
            # Try "Last Seen: Lighthouse" pattern
            m = re.search(r"Last\s*Seen\s*:?\s*([A-Za-z][A-Za-z ]{2,30})", t, flags=re.I)
            if m:
                candidate = clean_token(m.group(1))
                # If the candidate includes extra words, trim to first known map token
                for tok in candidate.replace("-", " ").split():
                    if tok in KNOWN_MAPS:
                        last_seen_map = tok
                        break
                if not last_seen_map:
                    # As a fallback, keep first word (often it's already just 'Customs', etc.)
                    last_seen_map = candidate.split()[0]
            # 2) If not found on same line, scan a small window ahead
            if not last_seen_map:
                window = " ".join(all_text[idx: idx+6])
                m2 = re.search(r"\b(Customs|Shoreline|Lighthouse|Woods|Streets|Reserve|Interchange|Factory|Labs)\b", window, flags=re.I)
                if m2:
                    last_seen_map = clean_token(m2.group(1))

            break  # Stop after first marker

    # Times and "last seen:" (ago)
    time_text, last_seen_text = parse_time_and_ago(all_text)

    debug = ""
    if not last_seen_map:
        # Provide a small debug snippet for troubleshooting
        sample = [t for t in all_text if t.lower().startswith("last seen")][:2]
        debug = f"pvp_debug: marker_lines={sample}"

    return {
        "map": last_seen_map,
        "timeText": time_text,
        "lastSeenText": last_seen_text,
        "source": PVP_URL,
        "debug": debug
    }

def parse_pve(html: str):
    """
    Strategy (PvE):
      - Find the text "Last Seen on PvE Mode:" then read the nearby span/inline with the map token.
      - Also capture "Time:" and "Last seen:" lines if present.
    The PvE page explicitly uses the "Last Seen on PvE Mode" header.  [ref]  [1](https://github.com/the-hideout/tarkov-api/issues/133)
    """
    soup = BeautifulSoup(html, "html.parser")
    all_text = list(text_nodes(soup))
    last_seen_map = None

    marker = soup.find(string=lambda s: isinstance(s, str) and "Last Seen on PvE Mode" in s)
    if marker:
        # Try closest siblings/children for a short map token
        siblings_text = []
        for sib in marker.parent.next_siblings:
            if hasattr(sib, "get_text"):
                txt = sib.get_text(strip=True)
            else:
                txt = str(sib).strip()
            if txt:
                siblings_text.append(txt)
            if len(siblings_text) > 5:
                break

        # First sibling chunk is often the map (e.g., "Lighthouse")
        if siblings_text:
            # If contains multiple words, try to pick a known map token
            words = siblings_text[0].replace("-", " ").split()
            for w in words:
                if w in KNOWN_MAPS:
                    last_seen_map = w
                    break
            if not last_seen_map:
                last_seen_map = words[0]

    time_text, last_seen_text = parse_time_and_ago(all_text)

    debug = ""
    if not last_seen_map:
        sample = [t for t in all_text if "Last Seen on PvE Mode" in t][:2]
        debug = f"pve_debug: marker_lines={sample}"

    return {
        "map": last_seen_map,
        "timeText": time_text,
        "lastSeenText": last_seen_text,
        "source": PVE_URL,
        "debug": debug
    }

def main():
    pvp_html = fetch_html(PVP_URL)
    pve_html = fetch_html(PVE_URL)

    pvp = parse_pvp(pvp_html)
    pve = parse_pve(pve_html)

    now_iso = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
    data = {
        "fetchedAt": now_iso,
        "pvp": pvp,
        "pve": pve
    }

    tmp = OUT + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, separators=(",", ":"))
    os.replace(tmp, OUT)
    print(f"Wrote {OUT}: PvP={pvp.get('map')} PvE={pve.get('map')} at {now_iso}")

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)