Archivage-ZIM/app.py
2025-10-31 23:37:33 +01:00

183 lines
5.6 KiB
Python

from flask import Flask, render_template, request, jsonify
import subprocess, threading, uuid, json, os
from datetime import datetime
app = Flask(__name__)
OUTPUT_DIR = "/srv/kiwix/zim"
TASKS_FILE = "tasks.json"
LOG_FILE = "zimit_archives.log"
tasks = {}
# ---------- Utilities ----------
def save_tasks():
serializable = {}
for tid, t in tasks.items():
serializable[tid] = {k: v for k, v in t.items() if k != "process"}
with open(TASKS_FILE, "w") as f:
json.dump(serializable, f, indent=2, ensure_ascii=False)
def load_tasks():
global tasks
if os.path.exists(TASKS_FILE):
with open(TASKS_FILE, "r") as f:
data = json.load(f)
for tid, t in data.items():
if t.get("status") not in ("terminee", "terminee (non trouve)", "annulee"):
if t.get("status") == "en cours":
t["status"] = "inconnue"
tasks[tid] = t
save_tasks()
def log_event(message):
date_str = datetime.now().strftime("%Y/%m/%d - %Hh%Mm%Ss")
with open(LOG_FILE, "a") as f:
f.write(f"[{date_str}] {message}\n")
# ---------- Core ----------
def run_zimit(task_id, site, name, title, page_limit, workers):
cmd = [
"docker", "run", "--rm", "-v", f"{OUTPUT_DIR}:/output",
"ghcr.io/openzim/zimit", "zimit",
f'--seeds="{site}"',
f'--name="{name}"',
f'--title="{title}"',
"--output=/output",
"--waitUntil=networkidle0",
f"--pageLimit={page_limit}",
f"--workers={workers}",
'--scopeExcludeRx="(\\?q=|signup-landing\\?|\\?cid=)"'
]
cmd_str = " ".join(cmd)
print(">>> Docker command to execute:")
print(cmd_str)
log_event(f"Start: name='{name}', title='{title}', site='{site}'")
log_event(f"Command: {cmd_str}")
tasks[task_id]["status"] = "en cours"
save_tasks()
try:
process = subprocess.Popen(cmd_str, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
tasks[task_id]["process"] = process
stdout, stderr = process.communicate()
if process.returncode == 0:
zim_path = os.path.join(OUTPUT_DIR, f"{name}.zim")
if os.path.exists(zim_path):
tasks[task_id]["status"] = "terminee"
tasks[task_id]["file"] = f"{name}.zim"
else:
tasks[task_id]["status"] = "terminee (non trouve)"
else:
tasks[task_id]["status"] = "erreur"
log_event(f"Erreur: {stderr.decode(errors='ignore')[:500]}")
except Exception as e:
tasks[task_id]["status"] = "erreur"
tasks[task_id]["error"] = str(e)
log_event(f"Exception: {str(e)}")
finally:
status = tasks[task_id]["status"]
log_event(f"End: name='{name}', status='{status}'")
log_event("-" * 80)
save_tasks()
# ---------- Routes ----------
@app.route("/")
def index():
for tid in list(tasks.keys()):
if tasks[tid]["status"] in ("terminee", "terminee (non trouve)", "annulee"):
del tasks[tid]
save_tasks()
return render_template("index.html")
@app.route("/start", methods=["POST"])
def start():
data = request.json
site = (data.get("site") or "").strip()
name = (data.get("name") or "").strip()
title = (data.get("title") or "").strip()
page_limit = str(data.get("page_limit", 20))
workers = str(data.get("workers", 4))
if not site or not name or not title:
return jsonify({"error": "Missing required fields"}), 400
task_id = str(uuid.uuid4())
date_str = datetime.now().strftime("%Y/%m/%d - %Hh%Mm%Ss")
tasks[task_id] = {
"site": site,
"name": name,
"title": title,
"status": "en attente",
"date": date_str
}
save_tasks()
thread = threading.Thread(target=run_zimit, args=(task_id, site, name, title, page_limit, workers))
thread.start()
return jsonify({"id": task_id})
@app.route("/status")
def status():
active_tasks = {}
for tid, t in tasks.items():
if t["status"] not in ("terminee", "terminee (non trouve)", "annulee"):
t_copy = {k: v for k, v in t.items() if k != "process"}
active_tasks[tid] = t_copy
return jsonify(active_tasks)
@app.route("/cancel/<task_id>", methods=["POST"])
def cancel(task_id):
task = tasks.get(task_id)
if task and task.get("process"):
task["process"].terminate()
task["status"] = "annulee"
save_tasks()
log_event(f"Task cancelled: {task.get('name')}")
return jsonify({"ok": True})
elif task:
task["status"] = "annulee"
save_tasks()
log_event(f"Task cancelled: {task.get('name')}")
return jsonify({"ok": True})
return jsonify({"ok": False, "error": "Task not found"}), 404
@app.route("/delete/<task_id>", methods=["POST"])
def delete_task(task_id):
"""Supprime une tache du suivi (visuel + persistence) sauf si elle est encore en cours."""
task = tasks.get(task_id)
if not task:
return jsonify({"ok": False, "error": "Task not found"}), 404
# Si le process n'existe plus, on considère que la tache n'est plus en cours
process = task.get("process")
status = task.get("status", "")
# Vérifie si la tache est encore vraiment en cours
if process and process.poll() is None and status == "en cours":
return jsonify({"ok": False, "error": "Cannot delete a running task"}), 400
# Supprime la tache du dictionnaire et du fichier JSON
del tasks[task_id]
save_tasks()
return jsonify({"ok": True})
if __name__ == "__main__":
load_tasks()
app.run(host="0.0.0.0", port=8080)