mirror of
https://github.com/Kizuren/LidlConnect.py.git
synced 2025-12-23 05:56:23 +01:00
39 lines
No EOL
1.7 KiB
Python
39 lines
No EOL
1.7 KiB
Python
"""HTML and data extraction for Lidl Connect API."""
|
|
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
from typing import Tuple
|
|
|
|
class ExtractorMixin:
|
|
"""HTML extraction methods for Lidl Connect API."""
|
|
|
|
def _get_soup(self, url: str) -> BeautifulSoup:
|
|
"""Get BeautifulSoup object from URL."""
|
|
r = self.session.get(url)
|
|
return BeautifulSoup(r.text, "html.parser")
|
|
|
|
def _extract_csrf(self, soup: BeautifulSoup) -> str:
|
|
"""Extract CSRF token from dashboard HTML."""
|
|
meta = soup.find("meta", {"name": "csrf-token"})
|
|
if not meta or not meta.get("content"):
|
|
raise ValueError("CSRF token not found in dashboard HTML")
|
|
return meta["content"]
|
|
|
|
def _extract_user_and_endpoint(self, soup: BeautifulSoup) -> Tuple[int, int]:
|
|
"""Extract user ID and endpoint ID from dashboard HTML."""
|
|
all_scripts = ""
|
|
for script in soup.find_all("script"):
|
|
if script.string:
|
|
all_scripts += script.string
|
|
|
|
user_match = re.search(r"window\.user\s*=\s*\{.*?'user':\s*\{\s*\"id\":\s*(\d+).*?\"userType\":\s*\"CUSTOMER\"", all_scripts, re.DOTALL)
|
|
endpoint_match = re.search(r'"endpoints":\s*\[\{\s*"id":\s*(\d+)', all_scripts, re.DOTALL)
|
|
|
|
if not user_match or not endpoint_match:
|
|
user_match = re.search(r'"id":\s*(\d+).*?"userType":\s*"CUSTOMER"', all_scripts, re.DOTALL)
|
|
endpoint_match = re.search(r'"endpoints":\s*\[\{\s*"id":\s*(\d+)', all_scripts, re.DOTALL)
|
|
|
|
if not user_match or not endpoint_match:
|
|
raise ValueError("Could not extract userId or endpointId from scripts")
|
|
|
|
return int(user_match.group(1)), int(endpoint_match.group(1)) |