50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
# Save as test_slice.py
|
|
import time, os, re
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.chrome.service import Service
|
|
|
|
BASE_URL = "https://www.gardenate.com"
|
|
ZONE_PARAM = "USA+-+Zone+8b"
|
|
MARKER = '<div class="plant-list">'
|
|
|
|
def make_driver():
|
|
options = Options()
|
|
options.add_argument("--headless")
|
|
options.add_argument("--no-sandbox")
|
|
options.add_argument("--disable-dev-shm-usage")
|
|
options.add_argument("--disable-gpu")
|
|
options.add_argument("--window-size=1920,1080")
|
|
for p in ["/usr/bin/chromedriver","/usr/lib/chromium-browser/chromedriver","/usr/lib/chromium/chromedriver"]:
|
|
if os.path.exists(p):
|
|
return webdriver.Chrome(service=Service(p), options=options)
|
|
return webdriver.Chrome(options=options)
|
|
|
|
driver = make_driver()
|
|
|
|
for month_num in [1, 4, 7]:
|
|
url = f"{BASE_URL}/?zone={ZONE_PARAM}&m={month_num}"
|
|
print(f"\n=== Month {month_num} ===")
|
|
driver.get(url)
|
|
time.sleep(4)
|
|
html = driver.page_source
|
|
|
|
# Count plant-list occurrences
|
|
count = html.count(MARKER)
|
|
print(f" plant-list divs found: {count}")
|
|
|
|
# Slice to first only
|
|
first = html.find(MARKER)
|
|
second = html.find(MARKER, first + len(MARKER))
|
|
if second != -1:
|
|
chunk = html[first:second]
|
|
else:
|
|
chunk = html[first:]
|
|
|
|
# Count plant links in slice
|
|
links = re.findall(r'href="/plant/([^/"]+)"', chunk)
|
|
# Deduplicate (thumbnail + name link = 2 per plant)
|
|
unique = list(dict.fromkeys(links))
|
|
print(f" Plant links in first section: {unique}")
|
|
|
|
driver.quit() |