# Save as test_slice.py import time, os, re from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service BASE_URL = "https://www.gardenate.com" ZONE_PARAM = "USA+-+Zone+8b" MARKER = '
' def make_driver(): options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-gpu") options.add_argument("--window-size=1920,1080") for p in ["/usr/bin/chromedriver","/usr/lib/chromium-browser/chromedriver","/usr/lib/chromium/chromedriver"]: if os.path.exists(p): return webdriver.Chrome(service=Service(p), options=options) return webdriver.Chrome(options=options) driver = make_driver() for month_num in [1, 4, 7]: url = f"{BASE_URL}/?zone={ZONE_PARAM}&m={month_num}" print(f"\n=== Month {month_num} ===") driver.get(url) time.sleep(4) html = driver.page_source # Count plant-list occurrences count = html.count(MARKER) print(f" plant-list divs found: {count}") # Slice to first only first = html.find(MARKER) second = html.find(MARKER, first + len(MARKER)) if second != -1: chunk = html[first:second] else: chunk = html[first:] # Count plant links in slice links = re.findall(r'href="/plant/([^/"]+)"', chunk) # Deduplicate (thumbnail + name link = 2 per plant) unique = list(dict.fromkeys(links)) print(f" Plant links in first section: {unique}") driver.quit()