#!/usr/bin/python3 """ update-goons-status.py Fetch "Last Seen" blocks from goon-tracker.com (PvP + PvE) and write a local JSON. Pages & markers (as of today): - PvP page: https://www.goon-tracker.com/ Shows "Last Seen: " and a "Time:" line. (wording: "Last Seen:") [ref] - PvE page: https://www.goon-tracker.com/pvetracker Shows "Last Seen on PvE Mode:" then the map + "Time:" + "Last seen:". [ref] [1](https://github.com/the-hideout/tarkov-api/issues/133) """ import json import os import re import sys from datetime import datetime, timezone import requests from bs4 import BeautifulSoup UA = "EFTC-CompanionBot/1.0 (+yourdomain; admin@yourdomain)" TIMEOUT = 15 OUT = "/var/www/EFT_COMPANION/public_html/goons-status.json" PVP_URL = "https://www.goon-tracker.com/" PVE_URL = "https://www.goon-tracker.com/pvetracker" # Common EFT map names (helps with fallback parsing) KNOWN_MAPS = { "Customs","Shoreline","Lighthouse","Woods","Streets","Reserve", "Interchange","Factory","Labs","Ground","GroundZero","GroundZero","The","TheLab","Lab","TheLab" } def fetch_html(url: str) -> str: r = requests.get(url, headers={"User-Agent": UA}, timeout=TIMEOUT) r.raise_for_status() return r.text def text_nodes(soup): for el in soup.find_all(string=True): txt = (el or "").strip() if txt: yield txt def clean_token(s: str) -> str: return re.sub(r"\s+", " ", s).strip() def parse_time_and_ago(all_text): time_text, last_seen_text = "", "" for t in all_text: if t.startswith("Time:"): time_text = t.split("Time:", 1)[-1].strip() elif t.startswith("Last seen:"): last_seen_text = t.split("Last seen:", 1)[-1].strip() return time_text, last_seen_text def parse_pvp(html: str): """ Strategy (PvP): 1) Find a text line that starts with "Last Seen". 2) Try to capture the map on the same line or the next nearby text node(s). 3) Also collect "Time:" and "Last seen:" if present. The PvP homepage wording is "Last Seen: " (without 'PvE'). [ref] """ soup = BeautifulSoup(html, "html.parser") all_text = list(text_nodes(soup)) last_seen_map = None time_text, last_seen_text = "", "" # 1) Direct regex on any line containing "Last Seen" for idx, t in enumerate(all_text): if t.lower().startswith("last seen"): # Try "Last Seen: Lighthouse" pattern m = re.search(r"Last\s*Seen\s*:?\s*([A-Za-z][A-Za-z ]{2,30})", t, flags=re.I) if m: candidate = clean_token(m.group(1)) # If the candidate includes extra words, trim to first known map token for tok in candidate.replace("-", " ").split(): if tok in KNOWN_MAPS: last_seen_map = tok break if not last_seen_map: # As a fallback, keep first word (often it's already just 'Customs', etc.) last_seen_map = candidate.split()[0] # 2) If not found on same line, scan a small window ahead if not last_seen_map: window = " ".join(all_text[idx: idx+6]) m2 = re.search(r"\b(Customs|Shoreline|Lighthouse|Woods|Streets|Reserve|Interchange|Factory|Labs)\b", window, flags=re.I) if m2: last_seen_map = clean_token(m2.group(1)) break # Stop after first marker # Times and "last seen:" (ago) time_text, last_seen_text = parse_time_and_ago(all_text) debug = "" if not last_seen_map: # Provide a small debug snippet for troubleshooting sample = [t for t in all_text if t.lower().startswith("last seen")][:2] debug = f"pvp_debug: marker_lines={sample}" return { "map": last_seen_map, "timeText": time_text, "lastSeenText": last_seen_text, "source": PVP_URL, "debug": debug } def parse_pve(html: str): """ Strategy (PvE): - Find the text "Last Seen on PvE Mode:" then read the nearby span/inline with the map token. - Also capture "Time:" and "Last seen:" lines if present. The PvE page explicitly uses the "Last Seen on PvE Mode" header. [ref] [1](https://github.com/the-hideout/tarkov-api/issues/133) """ soup = BeautifulSoup(html, "html.parser") all_text = list(text_nodes(soup)) last_seen_map = None marker = soup.find(string=lambda s: isinstance(s, str) and "Last Seen on PvE Mode" in s) if marker: # Try closest siblings/children for a short map token siblings_text = [] for sib in marker.parent.next_siblings: if hasattr(sib, "get_text"): txt = sib.get_text(strip=True) else: txt = str(sib).strip() if txt: siblings_text.append(txt) if len(siblings_text) > 5: break # First sibling chunk is often the map (e.g., "Lighthouse") if siblings_text: # If contains multiple words, try to pick a known map token words = siblings_text[0].replace("-", " ").split() for w in words: if w in KNOWN_MAPS: last_seen_map = w break if not last_seen_map: last_seen_map = words[0] time_text, last_seen_text = parse_time_and_ago(all_text) debug = "" if not last_seen_map: sample = [t for t in all_text if "Last Seen on PvE Mode" in t][:2] debug = f"pve_debug: marker_lines={sample}" return { "map": last_seen_map, "timeText": time_text, "lastSeenText": last_seen_text, "source": PVE_URL, "debug": debug } def main(): pvp_html = fetch_html(PVP_URL) pve_html = fetch_html(PVE_URL) pvp = parse_pvp(pvp_html) pve = parse_pve(pve_html) now_iso = datetime.now(timezone.utc).replace(microsecond=0).isoformat() data = { "fetchedAt": now_iso, "pvp": pvp, "pve": pve } tmp = OUT + ".tmp" with open(tmp, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, separators=(",", ":")) os.replace(tmp, OUT) print(f"Wrote {OUT}: PvP={pvp.get('map')} PvE={pve.get('map')} at {now_iso}") if __name__ == "__main__": try: main() except Exception as e: print(f"Error: {e}", file=sys.stderr) sys.exit(1)