Default

From Helix Project Wiki
Revision as of 20:27, 6 October 2025 by Steve Helix (talk | contribs) (Created page with "type=code lang=python import requests, re, json from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse BASE = "https://helixprojectai.com" MAIN = f"{BASE}/wiki/index.php/Main_Page" def fetch(url): r = requests.get(url, timeout=15) r.raise_for_status() return r.text # 1️⃣ fetch main page html = fetch(MAIN) soup = BeautifulSoup(html, "html.parser") # 2️⃣ collect classes & ids classes = set() ids = set() for tag in soup.find_all(Tru...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

type=code lang=python import requests, re, json from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse

BASE = "https://helixprojectai.com" MAIN = f"{BASE}/wiki/index.php/Main_Page"

def fetch(url):

   r = requests.get(url, timeout=15)
   r.raise_for_status()
   return r.text
  1. 1️⃣ fetch main page

html = fetch(MAIN) soup = BeautifulSoup(html, "html.parser")

  1. 2️⃣ collect classes & ids

classes = set() ids = set() for tag in soup.find_all(True):

   if tag.has_attr("class"):
       for c in tag["class"]:
           classes.add(c.strip())
   if tag.has_attr("id"):
       ids.add(tag["id"].strip())
  1. 3️⃣ find linked stylesheets

stylesheets = [] for link in soup.find_all("link", rel=lambda x: x and "stylesheet" in x):

   href = link.get("href")
   if href:
       full = urljoin(MAIN, href)
       stylesheets.append(full)
  1. 4️⃣ fetch each stylesheet and pull selectors that match our classes/ids

selector_patterns = set() css_contents = {} for css_url in stylesheets:

   try:
       css_text = fetch(css_url)
       css_contents[css_url] = css_text
       # Very simple regex: capture selectors before `{`
       for selector_block in re.findall(r"([^{]+){", css_text):
           # split by commas, strip whitespace
           for sel in selector_block.split(","):
               sel = sel.strip()
               # keep only selectors that are class or id we already saw
               if sel.startswith(".") or sel.startswith("#"):
                   name = sel[1:]  # strip leading . or #
                   if (sel.startswith(".") and name in classes) or (sel.startswith("#") and name in ids):
                       selector_patterns.add(sel)
   except Exception as e:
       print(f"⚠️ Could not fetch {css_url}: {e}")
  1. 5️⃣ Summarize

summary = {

   "page_url": MAIN,
   "num_classes_found": len(classes),
   "num_ids_found": len(ids),
   "unique_classes": sorted(classes)[:30],   # first 30 as sample
   "unique_ids": sorted(ids)[:30],
   "linked_stylesheets": stylesheets,
   "extracted_selectors": sorted(selector_patterns)[:40]  # first 40 as sample

}

print(json.dumps(summary, indent=2))