Files
FARM/test_slice.py
T
2026-06-25 21:29:21 +00:00

50 lines
1.6 KiB
Python

# Save as test_slice.py
import time, os, re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
BASE_URL = "https://www.gardenate.com"
ZONE_PARAM = "USA+-+Zone+8b"
MARKER = '<div class="plant-list">'
def make_driver():
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")
for p in ["/usr/bin/chromedriver","/usr/lib/chromium-browser/chromedriver","/usr/lib/chromium/chromedriver"]:
if os.path.exists(p):
return webdriver.Chrome(service=Service(p), options=options)
return webdriver.Chrome(options=options)
driver = make_driver()
for month_num in [1, 4, 7]:
url = f"{BASE_URL}/?zone={ZONE_PARAM}&m={month_num}"
print(f"\n=== Month {month_num} ===")
driver.get(url)
time.sleep(4)
html = driver.page_source
# Count plant-list occurrences
count = html.count(MARKER)
print(f" plant-list divs found: {count}")
# Slice to first only
first = html.find(MARKER)
second = html.find(MARKER, first + len(MARKER))
if second != -1:
chunk = html[first:second]
else:
chunk = html[first:]
# Count plant links in slice
links = re.findall(r'href="/plant/([^/"]+)"', chunk)
# Deduplicate (thumbnail + name link = 2 per plant)
unique = list(dict.fromkeys(links))
print(f" Plant links in first section: {unique}")
driver.quit()