Reduce default login wait to 60 seconds

This commit is contained in:
Stefan Heyn 2026-03-04 16:54:58 +01:00
parent 390ffe1d40
commit 5c6398eac1
5 changed files with 56 additions and 16 deletions

View file

@ -30,6 +30,12 @@ Explizit fuer Deutschland (Sprache/Zeitzone):
python main.py configure --marketplace de --locale de-DE --timezone Europe/Berlin --currency EUR --download-dir "C:\Users\<USER>\Downloads\amazon_rechnungen"
```
Falls Amazon trotzdem Englisch zeigt, Sprache explizit per URL erzwingen:
```powershell
python main.py configure --marketplace de --amazon-language de_DE --locale de-DE --timezone Europe/Berlin --currency EUR
```
Dann oeffnet sich ein Browser. Dort bei Amazon anmelden und auf Enter im Terminal druecken.
Die Session wird lokal gespeichert in:
@ -76,7 +82,7 @@ Dann im Browser auf deinem PC oeffnen:
http://<SERVER-IP>:6080/vnc.html
```
Im noVNC-Fenster bei Amazon einloggen. Die Session wird nach `LOGIN_WAIT_SECONDS` (Standard: 300s) automatisch gespeichert.
Im noVNC-Fenster bei Amazon einloggen. Die Session wird nach `LOGIN_WAIT_SECONDS` (Standard: 60s) automatisch gespeichert.
Anschließend kannst du den noVNC-Container beenden (`Ctrl+C` im Terminal).
Alternative per Hilfsskript:
@ -119,7 +125,7 @@ Optionen:
- `--headless true|false`: Browser sichtbar oder unsichtbar
- `--debug`: zeigt, wie viele Detailseiten und Rechnungslinks gefunden werden
- `--debug-json [pfad]`: schreibt Laufdetails als JSON (ohne Pfad: Standarddatei)
- `configure --locale de-DE --timezone Europe/Berlin`: erzwingt deutsche Sprache und Berliner Zeitzone im Browser-Kontext
- `configure --locale de-DE --timezone Europe/Berlin --amazon-language de_DE`: erzwingt deutsche Sprache (inkl. `language=de_DE`) und Berliner Zeitzone
## Hinweise

View file

@ -46,10 +46,11 @@
environment:
- MARKETPLACE=de
- DOWNLOAD_DIR=/downloads
- LOGIN_WAIT_SECONDS=300
- LOGIN_WAIT_SECONDS=60
- NOVNC_PORT=6080
ports:
- "6080:6080"
volumes:
- ./state:/root/.amazon_invoice_downloader
- ./downloads:/downloads

49
main.py
View file

@ -6,7 +6,7 @@ from dataclasses import dataclass
from datetime import date, datetime
from pathlib import Path
from typing import Optional
from urllib.parse import urljoin
from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
import dateparser
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
@ -121,8 +121,16 @@ def years_for_range(start_date: date, end_date: date) -> list[int]:
return list(range(end_date.year, start_date.year - 1, -1))
def build_orders_url(marketplace: str, year: int) -> str:
return f"https://www.amazon.{marketplace}/your-orders/orders?timeFilter=year-{year}"
def with_amazon_language(url: str, amazon_language: str) -> str:
parsed = urlparse(url)
query = dict(parse_qsl(parsed.query, keep_blank_values=True))
query["language"] = amazon_language
return urlunparse(parsed._replace(query=urlencode(query)))
def build_orders_url(marketplace: str, year: int, amazon_language: str) -> str:
base = f"https://www.amazon.{marketplace}/your-orders/orders?timeFilter=year-{year}"
return with_amazon_language(base, amazon_language)
def text_contains_invoice_hint(text: str) -> bool:
@ -214,10 +222,20 @@ def extract_orders_from_overview(page, base_url: str, debug: bool = False) -> li
return results
def extract_invoice_candidates_from_detail(context, detail_url: str, base_url: str, debug: bool = False) -> Optional[OrderInvoice]:
def extract_invoice_candidates_from_detail(
context,
detail_url: str,
base_url: str,
amazon_language: str,
debug: bool = False,
) -> Optional[OrderInvoice]:
detail_page = context.new_page()
try:
detail_page.goto(detail_url, wait_until="domcontentloaded", timeout=15000)
detail_page.goto(
with_amazon_language(detail_url, amazon_language),
wait_until="domcontentloaded",
timeout=15000,
)
detail_page.wait_for_timeout(1200)
body_text = detail_page.inner_text("body", timeout=4000)
order_date = parse_order_date_from_text(body_text)
@ -284,6 +302,7 @@ def configure(args) -> None:
"locale": args.locale,
"timezone": args.timezone,
"currency": args.currency,
"amazon_language": args.amazon_language,
}
save_config(config)
ensure_app_dir()
@ -292,7 +311,13 @@ def configure(args) -> None:
browser = p.chromium.launch(headless=False)
context = browser.new_context(**build_context_options(config))
page = context.new_page()
page.goto(f"https://www.amazon.{args.marketplace}/your-orders/orders", wait_until="domcontentloaded")
page.goto(
with_amazon_language(
f"https://www.amazon.{args.marketplace}/your-orders/orders",
args.amazon_language,
),
wait_until="domcontentloaded",
)
print("Bitte im Browser bei Amazon einloggen.")
if args.login_wait_seconds > 0:
print(
@ -321,6 +346,7 @@ def download(args) -> None:
raise SystemExit("'from' muss kleiner/gleich 'to' sein.")
marketplace = config["marketplace"]
amazon_language = config.get("amazon_language", "de_DE")
download_dir = Path(args.output or config["download_dir"]).expanduser().resolve()
download_dir.mkdir(parents=True, exist_ok=True)
debug_json_target = args.debug_json or (str(DEFAULT_DEBUG_JSON_PATH) if args.debug else None)
@ -348,7 +374,7 @@ def download(args) -> None:
}
for year in years:
filtered_url = build_orders_url(marketplace, year)
filtered_url = build_orders_url(marketplace, year, amazon_language)
if args.debug:
print(f"[debug] Wechsle auf Jahresfilter {year}: {filtered_url}")
page.goto(filtered_url, wait_until="domcontentloaded", timeout=15000)
@ -402,7 +428,11 @@ def download(args) -> None:
if not next_page_url:
break
try:
page.goto(next_page_url, wait_until="domcontentloaded", timeout=15000)
page.goto(
with_amazon_language(next_page_url, amazon_language),
wait_until="domcontentloaded",
timeout=15000,
)
except PlaywrightTimeoutError:
break
@ -485,10 +515,11 @@ def build_parser() -> argparse.ArgumentParser:
p_config.add_argument("--locale", default="de-DE", help="Browser-Locale, z. B. de-DE")
p_config.add_argument("--timezone", default="Europe/Berlin", help="Zeitzone, z. B. Europe/Berlin")
p_config.add_argument("--currency", default="EUR", help="Waehrungshinweis fuer Konfiguration")
p_config.add_argument("--amazon-language", default="de_DE", help="Amazon URL-Sprache, z. B. de_DE")
p_config.add_argument(
"--login-wait-seconds",
type=int,
default=0,
default=60,
help="Optional: wartet X Sekunden vor Session-Speicherung (fuer noVNC/Serverbetrieb).",
)
p_config.set_defaults(func=configure)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/usr/bin/env bash
set -euo pipefail
DISPLAY_NUM=${DISPLAY_NUM:-:99}
@ -6,7 +6,7 @@ VNC_PORT=${VNC_PORT:-5900}
NOVNC_PORT=${NOVNC_PORT:-6080}
MARKETPLACE=${MARKETPLACE:-de}
DOWNLOAD_DIR=${DOWNLOAD_DIR:-/downloads}
LOGIN_WAIT_SECONDS=${LOGIN_WAIT_SECONDS:-300}
LOGIN_WAIT_SECONDS=${LOGIN_WAIT_SECONDS:-60}
export DISPLAY="$DISPLAY_NUM"
@ -19,3 +19,4 @@ echo "noVNC bereit unter: http://<SERVER-IP>:$NOVNC_PORT/vnc.html"
echo "Melde dich bei Amazon an. Session wird nach $LOGIN_WAIT_SECONDS Sekunden gespeichert."
python /app/main.py configure --marketplace "$MARKETPLACE" --download-dir "$DOWNLOAD_DIR" --login-wait-seconds "$LOGIN_WAIT_SECONDS"

View file

@ -1,11 +1,11 @@
#!/usr/bin/env bash
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
MARKETPLACE="${MARKETPLACE:-de}"
LOGIN_WAIT_SECONDS="${LOGIN_WAIT_SECONDS:-300}"
LOGIN_WAIT_SECONDS="${LOGIN_WAIT_SECONDS:-60}"
NOVNC_PORT="${NOVNC_PORT:-6080}"
DOWNLOAD_DIR="${DOWNLOAD_DIR:-/downloads}"
@ -20,3 +20,4 @@ echo "Starte noVNC-Configure auf Port ${NOVNC_PORT} ..."
echo "Oeffne im Browser: http://<SERVER-IP>:${NOVNC_PORT}/vnc.html"
docker compose --profile configure-novnc up --build amazon-invoice-configure-novnc