"""HTML and data extraction for Lidl Connect API."""
from bs4 import BeautifulSoup
import re
from typing import Tuple
class ExtractorMixin:
"""HTML extraction methods for Lidl Connect API."""
def _get_soup(self, url: str) -> BeautifulSoup:
"""Get BeautifulSoup object from URL."""
r = self.session.get(url)
return BeautifulSoup(r.text, "html.parser")
def _extract_csrf(self, soup: BeautifulSoup) -> str:
"""Extract CSRF token from dashboard HTML."""
meta = soup.find("meta", {"name": "csrf-token"})
if not meta or not meta.get("content"):
raise ValueError("CSRF token not found in dashboard HTML")
return meta["content"]
def _extract_user_and_endpoint(self, soup: BeautifulSoup) -> Tuple[int, int]:
"""Extract user ID and endpoint ID from dashboard HTML."""
all_scripts = ""
for script in soup.find_all("script"):
if script.string:
all_scripts += script.string
user_match = re.search(r"window\.user\s*=\s*\{.*?'user':\s*\{\s*\"id\":\s*(\d+).*?\"userType\":\s*\"CUSTOMER\"", all_scripts, re.DOTALL)
endpoint_match = re.search(r'"endpoints":\s*\[\{\s*"id":\s*(\d+)', all_scripts, re.DOTALL)
if not user_match or not endpoint_match:
user_match = re.search(r'"id":\s*(\d+).*?"userType":\s*"CUSTOMER"', all_scripts, re.DOTALL)
endpoint_match = re.search(r'"endpoints":\s*\[\{\s*"id":\s*(\d+)', all_scripts, re.DOTALL)
if not user_match or not endpoint_match:
raise ValueError("Could not extract userId or endpointId from scripts")
return int(user_match.group(1)), int(endpoint_match.group(1))