From 5c6398eac19de98c1c34db7433cc1a66368ae96d Mon Sep 17 00:00:00 2001 From: Stefan Heyn Date: Wed, 4 Mar 2026 16:54:58 +0100 Subject: [PATCH] Reduce default login wait to 60 seconds --- README.md | 10 +++++-- docker-compose.yml | 3 +- main.py | 49 ++++++++++++++++++++++++++------ scripts/start-novnc-configure.sh | 5 ++-- startConfigure.sh | 5 ++-- 5 files changed, 56 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 4a3a883..0495d9d 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,12 @@ Explizit fuer Deutschland (Sprache/Zeitzone): python main.py configure --marketplace de --locale de-DE --timezone Europe/Berlin --currency EUR --download-dir "C:\Users\\Downloads\amazon_rechnungen" ``` +Falls Amazon trotzdem Englisch zeigt, Sprache explizit per URL erzwingen: + +```powershell +python main.py configure --marketplace de --amazon-language de_DE --locale de-DE --timezone Europe/Berlin --currency EUR +``` + Dann oeffnet sich ein Browser. Dort bei Amazon anmelden und auf Enter im Terminal druecken. Die Session wird lokal gespeichert in: @@ -76,7 +82,7 @@ Dann im Browser auf deinem PC oeffnen: http://:6080/vnc.html ``` -Im noVNC-Fenster bei Amazon einloggen. Die Session wird nach `LOGIN_WAIT_SECONDS` (Standard: 300s) automatisch gespeichert. +Im noVNC-Fenster bei Amazon einloggen. Die Session wird nach `LOGIN_WAIT_SECONDS` (Standard: 60s) automatisch gespeichert. Anschließend kannst du den noVNC-Container beenden (`Ctrl+C` im Terminal). Alternative per Hilfsskript: @@ -119,7 +125,7 @@ Optionen: - `--headless true|false`: Browser sichtbar oder unsichtbar - `--debug`: zeigt, wie viele Detailseiten und Rechnungslinks gefunden werden - `--debug-json [pfad]`: schreibt Laufdetails als JSON (ohne Pfad: Standarddatei) -- `configure --locale de-DE --timezone Europe/Berlin`: erzwingt deutsche Sprache und Berliner Zeitzone im Browser-Kontext +- `configure --locale de-DE --timezone Europe/Berlin --amazon-language de_DE`: erzwingt deutsche Sprache (inkl. `language=de_DE`) und Berliner Zeitzone ## Hinweise diff --git a/docker-compose.yml b/docker-compose.yml index dacf739..4274125 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -46,10 +46,11 @@ environment: - MARKETPLACE=de - DOWNLOAD_DIR=/downloads - - LOGIN_WAIT_SECONDS=300 + - LOGIN_WAIT_SECONDS=60 - NOVNC_PORT=6080 ports: - "6080:6080" volumes: - ./state:/root/.amazon_invoice_downloader - ./downloads:/downloads + diff --git a/main.py b/main.py index bae0979..427d5f4 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from datetime import date, datetime from pathlib import Path from typing import Optional -from urllib.parse import urljoin +from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse import dateparser from playwright.sync_api import TimeoutError as PlaywrightTimeoutError @@ -121,8 +121,16 @@ def years_for_range(start_date: date, end_date: date) -> list[int]: return list(range(end_date.year, start_date.year - 1, -1)) -def build_orders_url(marketplace: str, year: int) -> str: - return f"https://www.amazon.{marketplace}/your-orders/orders?timeFilter=year-{year}" +def with_amazon_language(url: str, amazon_language: str) -> str: + parsed = urlparse(url) + query = dict(parse_qsl(parsed.query, keep_blank_values=True)) + query["language"] = amazon_language + return urlunparse(parsed._replace(query=urlencode(query))) + + +def build_orders_url(marketplace: str, year: int, amazon_language: str) -> str: + base = f"https://www.amazon.{marketplace}/your-orders/orders?timeFilter=year-{year}" + return with_amazon_language(base, amazon_language) def text_contains_invoice_hint(text: str) -> bool: @@ -214,10 +222,20 @@ def extract_orders_from_overview(page, base_url: str, debug: bool = False) -> li return results -def extract_invoice_candidates_from_detail(context, detail_url: str, base_url: str, debug: bool = False) -> Optional[OrderInvoice]: +def extract_invoice_candidates_from_detail( + context, + detail_url: str, + base_url: str, + amazon_language: str, + debug: bool = False, +) -> Optional[OrderInvoice]: detail_page = context.new_page() try: - detail_page.goto(detail_url, wait_until="domcontentloaded", timeout=15000) + detail_page.goto( + with_amazon_language(detail_url, amazon_language), + wait_until="domcontentloaded", + timeout=15000, + ) detail_page.wait_for_timeout(1200) body_text = detail_page.inner_text("body", timeout=4000) order_date = parse_order_date_from_text(body_text) @@ -284,6 +302,7 @@ def configure(args) -> None: "locale": args.locale, "timezone": args.timezone, "currency": args.currency, + "amazon_language": args.amazon_language, } save_config(config) ensure_app_dir() @@ -292,7 +311,13 @@ def configure(args) -> None: browser = p.chromium.launch(headless=False) context = browser.new_context(**build_context_options(config)) page = context.new_page() - page.goto(f"https://www.amazon.{args.marketplace}/your-orders/orders", wait_until="domcontentloaded") + page.goto( + with_amazon_language( + f"https://www.amazon.{args.marketplace}/your-orders/orders", + args.amazon_language, + ), + wait_until="domcontentloaded", + ) print("Bitte im Browser bei Amazon einloggen.") if args.login_wait_seconds > 0: print( @@ -321,6 +346,7 @@ def download(args) -> None: raise SystemExit("'from' muss kleiner/gleich 'to' sein.") marketplace = config["marketplace"] + amazon_language = config.get("amazon_language", "de_DE") download_dir = Path(args.output or config["download_dir"]).expanduser().resolve() download_dir.mkdir(parents=True, exist_ok=True) debug_json_target = args.debug_json or (str(DEFAULT_DEBUG_JSON_PATH) if args.debug else None) @@ -348,7 +374,7 @@ def download(args) -> None: } for year in years: - filtered_url = build_orders_url(marketplace, year) + filtered_url = build_orders_url(marketplace, year, amazon_language) if args.debug: print(f"[debug] Wechsle auf Jahresfilter {year}: {filtered_url}") page.goto(filtered_url, wait_until="domcontentloaded", timeout=15000) @@ -402,7 +428,11 @@ def download(args) -> None: if not next_page_url: break try: - page.goto(next_page_url, wait_until="domcontentloaded", timeout=15000) + page.goto( + with_amazon_language(next_page_url, amazon_language), + wait_until="domcontentloaded", + timeout=15000, + ) except PlaywrightTimeoutError: break @@ -485,10 +515,11 @@ def build_parser() -> argparse.ArgumentParser: p_config.add_argument("--locale", default="de-DE", help="Browser-Locale, z. B. de-DE") p_config.add_argument("--timezone", default="Europe/Berlin", help="Zeitzone, z. B. Europe/Berlin") p_config.add_argument("--currency", default="EUR", help="Waehrungshinweis fuer Konfiguration") + p_config.add_argument("--amazon-language", default="de_DE", help="Amazon URL-Sprache, z. B. de_DE") p_config.add_argument( "--login-wait-seconds", type=int, - default=0, + default=60, help="Optional: wartet X Sekunden vor Session-Speicherung (fuer noVNC/Serverbetrieb).", ) p_config.set_defaults(func=configure) diff --git a/scripts/start-novnc-configure.sh b/scripts/start-novnc-configure.sh index a3a3f14..80b8b9d 100644 --- a/scripts/start-novnc-configure.sh +++ b/scripts/start-novnc-configure.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env bash +#!/usr/bin/env bash set -euo pipefail DISPLAY_NUM=${DISPLAY_NUM:-:99} @@ -6,7 +6,7 @@ VNC_PORT=${VNC_PORT:-5900} NOVNC_PORT=${NOVNC_PORT:-6080} MARKETPLACE=${MARKETPLACE:-de} DOWNLOAD_DIR=${DOWNLOAD_DIR:-/downloads} -LOGIN_WAIT_SECONDS=${LOGIN_WAIT_SECONDS:-300} +LOGIN_WAIT_SECONDS=${LOGIN_WAIT_SECONDS:-60} export DISPLAY="$DISPLAY_NUM" @@ -19,3 +19,4 @@ echo "noVNC bereit unter: http://:$NOVNC_PORT/vnc.html" echo "Melde dich bei Amazon an. Session wird nach $LOGIN_WAIT_SECONDS Sekunden gespeichert." python /app/main.py configure --marketplace "$MARKETPLACE" --download-dir "$DOWNLOAD_DIR" --login-wait-seconds "$LOGIN_WAIT_SECONDS" + diff --git a/startConfigure.sh b/startConfigure.sh index 69fde17..2519b29 100755 --- a/startConfigure.sh +++ b/startConfigure.sh @@ -1,11 +1,11 @@ -#!/usr/bin/env bash +#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" MARKETPLACE="${MARKETPLACE:-de}" -LOGIN_WAIT_SECONDS="${LOGIN_WAIT_SECONDS:-300}" +LOGIN_WAIT_SECONDS="${LOGIN_WAIT_SECONDS:-60}" NOVNC_PORT="${NOVNC_PORT:-6080}" DOWNLOAD_DIR="${DOWNLOAD_DIR:-/downloads}" @@ -20,3 +20,4 @@ echo "Starte noVNC-Configure auf Port ${NOVNC_PORT} ..." echo "Oeffne im Browser: http://:${NOVNC_PORT}/vnc.html" docker compose --profile configure-novnc up --build amazon-invoice-configure-novnc +