import os
import re
import subprocess
import requests
import random
import string
from urllib.parse import urlparse

TEXT_EXTENSIONS = {'.md', '.markdown', '.txt', '.rst', '.adoc', '.html'}
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'}
LOG_FILENAME = "no_images.log"

def get_repo_name_from_url(repo_url):
    return os.path.splitext(os.path.basename(urlparse(repo_url).path))[0]

def is_text_file(filename):
    return os.path.splitext(filename)[1].lower() in TEXT_EXTENSIONS

def generate_unique_filename(extension, existing_names):
    while True:
        name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=20))
        filename = f"{name}{extension}"
        if filename not in existing_names:
            return filename

def mimetotype_to_extension(mime):
    mapping = {
        "image/jpeg": "jpg",
        "image/png": "png",
        "image/gif": "gif",
        "image/webp": "webp",
        "image/svg+xml": "svg"
    }
    return mapping.get(mime, "img")

def download_and_replace_images(repo_path):
    images_dir = os.path.join(repo_path, "images")
    os.makedirs(images_dir, exist_ok=True)

    modified_files = []
    logged_urls = []
    downloaded_files = set()
    log_path = os.path.join(repo_path, LOG_FILENAME)

    pattern_md = r'(!?\[.*?\]\()(.+?)(\))'
    pattern_img_tag = r'(<img[^>]*?src=["\'])(https?[^"\']+)(["\'])'

    for root, _, files in os.walk(repo_path):
        for file in files:
            full_path = os.path.join(root, file)
            if not is_text_file(file):
                continue

            with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()

            modified = False

            def handle_url(url, base_path):
                nonlocal modified
                try:
                    r = requests.get(url, timeout=10)
                    r.raise_for_status()

                    content_type = r.headers.get("Content-Type", "")
                    if not content_type.startswith("image/"):
                        reason = f"Type no-image ({content_type})"
                        logged_urls.append((url, reason))
                        print(f"⚠️ Ignored : {url} — {reason}")
                        return None

                    ext = os.path.splitext(urlparse(url).path)[1]
                    if not ext:
                        ext = mimetotype_to_extension(content_type)
                    if not ext.startswith('.'):
                        ext = '.' + ext

                    filename = generate_unique_filename(ext, downloaded_files)
                    downloaded_files.add(filename)
                    local_path = os.path.join(images_dir, filename)

                    with open(local_path, "wb") as img_file:
                        img_file.write(r.content)
                    print(f"✅ Image downloaded : {filename}")

                    rel_path = os.path.relpath(local_path, os.path.dirname(base_path))
                    modified = True
                    return rel_path

                except Exception as e:
                    reason = f"Error : {e}"
                    logged_urls.append((url, reason))
                    print(f"❌ Download error {url} — {e}")
                    return None

            def replace_md(match):
                url = match.group(2)
                if not url.startswith("http"):
                    return match.group(0)
                new_path = handle_url(url, full_path)
                return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0)

            def replace_img_tag(match):
                url = match.group(2)
                if not url.startswith("http"):
                    return match.group(0)
                new_path = handle_url(url, full_path)
                return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0)

            content = re.sub(pattern_md, replace_md, content)
            content = re.sub(pattern_img_tag, replace_img_tag, content)

            if modified:
                with open(full_path, "w", encoding="utf-8") as f:
                    f.write(content)
                modified_files.append(full_path)
                print(f"✒️ Modified file : {full_path}")

    if logged_urls:
        with open(log_path, "w", encoding="utf-8") as log_file:
            for url, reason in logged_urls:
                log_file.write(f"{url} - {reason}\n")
        print(f"📝 Log file created : {log_path}")

    print(f"\n✅ {len(modified_files)} Modified file(s).")

    move_all_unused_images_to_old(repo_path)

def move_all_unused_images_to_old(repo_path):
    images_dir = os.path.join(repo_path, "images")
    old_dir = os.path.join(images_dir, "old")
    os.makedirs(old_dir, exist_ok=True)

    all_image_paths = []
    for root, _, files in os.walk(repo_path):
        if '.git' in root:
            continue
        for file in files:
            ext = os.path.splitext(file)[1].lower()
            if ext in IMAGE_EXTENSIONS:
                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, repo_path)
                all_image_paths.append((rel_path, full_path))

    referenced = set()
    for root, _, files in os.walk(repo_path):
        for file in files:
            if not is_text_file(file):
                continue
            full_path = os.path.join(root, file)
            with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
                content = f.read()
                for rel_path, _ in all_image_paths:
                    if rel_path in content or os.path.basename(rel_path) in content:
                        referenced.add(rel_path)

    moved = 0
    for rel_path, full_path in all_image_paths:
        if rel_path in referenced:
            continue
        filename = os.path.basename(full_path)
        target_path = os.path.join(old_dir, filename)

        count = 1
        while os.path.exists(target_path):
            name, ext = os.path.splitext(filename)
            target_path = os.path.join(old_dir, f"{name}_{count}{ext}")
            count += 1

        os.rename(full_path, target_path)
        print(f"↴ Image moved : {rel_path} → images/old/{os.path.basename(target_path)}")
        moved += 1

    print(f"\n↴ {moved} unreferenced image(s) moved in images/old/")

def main():
    print("")
    print("▄▖▘▗   ▖  ▖▄▖▄▖")
    print("▌ ▌▜▘▄▖▛▖▞▌▙▘▐ ")
    print("▙▌▌▐▖  ▌▝ ▌▌▌▟▖")
    print("")
    print("Ce programme python récupère un dépot git avec toute les images et les enregistres localement en mettant à jour les liens vers celles-ci.")
    print("License: CC BY-NC-SA 4.0")
    print("")

    repo_url = input("➤ Enter the URL of the GitHub repository to migrate (*.git) : ").strip()
    destination = input("➤ Enter the absolute path of the destination folder : ").strip()

    if not os.path.isdir(destination):
        print("❌ The specified folder does not exist.")
        return

    repo_name = get_repo_name_from_url(repo_url)
    repo_path = os.path.join(destination, repo_name)

    if os.path.exists(repo_path):
        print(f"⚠️ The '{repo_path}' folder already exists. Delete it or choose another location.")
        return

    print(f"📥 Cloning the repository in : {repo_path}")
    subprocess.run(["git", "clone", repo_url, repo_path], check=True)

    download_and_replace_images(repo_path)

    print(f"\n📦 Deposit ready in : {repo_path}")

if __name__ == "__main__":
    main()