initial commit
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
# Save as test_slice.py
|
||||
import time, os, re
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
|
||||
BASE_URL = "https://www.gardenate.com"
|
||||
ZONE_PARAM = "USA+-+Zone+8b"
|
||||
MARKER = '<div class="plant-list">'
|
||||
|
||||
def make_driver():
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
options.add_argument("--no-sandbox")
|
||||
options.add_argument("--disable-dev-shm-usage")
|
||||
options.add_argument("--disable-gpu")
|
||||
options.add_argument("--window-size=1920,1080")
|
||||
for p in ["/usr/bin/chromedriver","/usr/lib/chromium-browser/chromedriver","/usr/lib/chromium/chromedriver"]:
|
||||
if os.path.exists(p):
|
||||
return webdriver.Chrome(service=Service(p), options=options)
|
||||
return webdriver.Chrome(options=options)
|
||||
|
||||
driver = make_driver()
|
||||
|
||||
for month_num in [1, 4, 7]:
|
||||
url = f"{BASE_URL}/?zone={ZONE_PARAM}&m={month_num}"
|
||||
print(f"\n=== Month {month_num} ===")
|
||||
driver.get(url)
|
||||
time.sleep(4)
|
||||
html = driver.page_source
|
||||
|
||||
# Count plant-list occurrences
|
||||
count = html.count(MARKER)
|
||||
print(f" plant-list divs found: {count}")
|
||||
|
||||
# Slice to first only
|
||||
first = html.find(MARKER)
|
||||
second = html.find(MARKER, first + len(MARKER))
|
||||
if second != -1:
|
||||
chunk = html[first:second]
|
||||
else:
|
||||
chunk = html[first:]
|
||||
|
||||
# Count plant links in slice
|
||||
links = re.findall(r'href="/plant/([^/"]+)"', chunk)
|
||||
# Deduplicate (thumbnail + name link = 2 per plant)
|
||||
unique = list(dict.fromkeys(links))
|
||||
print(f" Plant links in first section: {unique}")
|
||||
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user