FARM/test_slice.py

# Save as test_slice.py
import time, os, re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

BASE_URL = "https://www.gardenate.com"
ZONE_PARAM = "USA+-+Zone+8b"
MARKER = '<div class="plant-list">'

def make_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920,1080")
    for p in ["/usr/bin/chromedriver","/usr/lib/chromium-browser/chromedriver","/usr/lib/chromium/chromedriver"]:
        if os.path.exists(p):
            return webdriver.Chrome(service=Service(p), options=options)
    return webdriver.Chrome(options=options)

driver = make_driver()

for month_num in [1, 4, 7]:
    url = f"{BASE_URL}/?zone={ZONE_PARAM}&m={month_num}"
    print(f"\n=== Month {month_num} ===")
    driver.get(url)
    time.sleep(4)
    html = driver.page_source

    # Count plant-list occurrences
    count = html.count(MARKER)
    print(f"  plant-list divs found: {count}")

    # Slice to first only
    first = html.find(MARKER)
    second = html.find(MARKER, first + len(MARKER))
    if second != -1:
        chunk = html[first:second]
    else:
        chunk = html[first:]

    # Count plant links in slice
    links = re.findall(r'href="/plant/([^/"]+)"', chunk)
    # Deduplicate (thumbnail + name link = 2 per plant)
    unique = list(dict.fromkeys(links))
    print(f"  Plant links in first section: {unique}")

driver.quit()