import os import re import subprocess import requests import random import string from urllib.parse import urlparse TEXT_EXTENSIONS = {'.md', '.markdown', '.txt', '.rst', '.adoc', '.html'} IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'} LOG_FILENAME = "no_images.log" def get_repo_name_from_url(repo_url): return os.path.splitext(os.path.basename(urlparse(repo_url).path))[0] def is_text_file(filename): return os.path.splitext(filename)[1].lower() in TEXT_EXTENSIONS def generate_unique_filename(extension, existing_names): while True: name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=20)) filename = f"{name}{extension}" if filename not in existing_names: return filename def mimetotype_to_extension(mime): mapping = { "image/jpeg": "jpg", "image/png": "png", "image/gif": "gif", "image/webp": "webp", "image/svg+xml": "svg" } return mapping.get(mime, "img") def download_and_replace_images(repo_path): images_dir = os.path.join(repo_path, "images") os.makedirs(images_dir, exist_ok=True) modified_files = [] logged_urls = [] downloaded_files = set() log_path = os.path.join(repo_path, LOG_FILENAME) pattern_md = r'(!?\[.*?\]\()(.+?)(\))' pattern_img_tag = r'(]*?src=["\'])(https?[^"\']+)(["\'])' for root, _, files in os.walk(repo_path): for file in files: full_path = os.path.join(root, file) if not is_text_file(file): continue with open(full_path, "r", encoding="utf-8", errors="ignore") as f: content = f.read() modified = False def handle_url(url, base_path): nonlocal modified try: r = requests.get(url, timeout=10) r.raise_for_status() content_type = r.headers.get("Content-Type", "") if not content_type.startswith("image/"): reason = f"Type no-image ({content_type})" logged_urls.append((url, reason)) print(f"⚠️ Ignored : {url} — {reason}") return None ext = os.path.splitext(urlparse(url).path)[1] if not ext: ext = mimetotype_to_extension(content_type) if not ext.startswith('.'): ext = '.' + ext filename = generate_unique_filename(ext, downloaded_files) downloaded_files.add(filename) local_path = os.path.join(images_dir, filename) with open(local_path, "wb") as img_file: img_file.write(r.content) print(f"✅ Image downloaded : {filename}") rel_path = os.path.relpath(local_path, os.path.dirname(base_path)) modified = True return rel_path except Exception as e: reason = f"Error : {e}" logged_urls.append((url, reason)) print(f"❌ Download error {url} — {e}") return None def replace_md(match): url = match.group(2) if not url.startswith("http"): return match.group(0) new_path = handle_url(url, full_path) return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) def replace_img_tag(match): url = match.group(2) if not url.startswith("http"): return match.group(0) new_path = handle_url(url, full_path) return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) content = re.sub(pattern_md, replace_md, content) content = re.sub(pattern_img_tag, replace_img_tag, content) if modified: with open(full_path, "w", encoding="utf-8") as f: f.write(content) modified_files.append(full_path) print(f"✒️ Modified file : {full_path}") if logged_urls: with open(log_path, "w", encoding="utf-8") as log_file: for url, reason in logged_urls: log_file.write(f"{url} - {reason}\n") print(f"📝 Log file created : {log_path}") print(f"\n✅ {len(modified_files)} Modified file(s).") move_all_unused_images_to_old(repo_path) def move_all_unused_images_to_old(repo_path): images_dir = os.path.join(repo_path, "images") old_dir = os.path.join(images_dir, "old") os.makedirs(old_dir, exist_ok=True) all_image_paths = [] for root, _, files in os.walk(repo_path): if '.git' in root: continue for file in files: ext = os.path.splitext(file)[1].lower() if ext in IMAGE_EXTENSIONS: full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, repo_path) all_image_paths.append((rel_path, full_path)) referenced = set() for root, _, files in os.walk(repo_path): for file in files: if not is_text_file(file): continue full_path = os.path.join(root, file) with open(full_path, "r", encoding="utf-8", errors="ignore") as f: content = f.read() for rel_path, _ in all_image_paths: if rel_path in content or os.path.basename(rel_path) in content: referenced.add(rel_path) moved = 0 for rel_path, full_path in all_image_paths: if rel_path in referenced: continue filename = os.path.basename(full_path) target_path = os.path.join(old_dir, filename) count = 1 while os.path.exists(target_path): name, ext = os.path.splitext(filename) target_path = os.path.join(old_dir, f"{name}_{count}{ext}") count += 1 os.rename(full_path, target_path) print(f"↴ Image moved : {rel_path} → images/old/{os.path.basename(target_path)}") moved += 1 print(f"\n↴ {moved} unreferenced image(s) moved in images/old/") def main(): print("") print("▄▖▘▗ ▖ ▖▄▖▄▖") print("▌ ▌▜▘▄▖▛▖▞▌▙▘▐ ") print("▙▌▌▐▖ ▌▝ ▌▌▌▟▖") print("") print("Ce programme python récupère un dépot git avec toute les images et les enregistres localement en mettant à jour les liens vers celles-ci.") print("License: CC BY-NC-SA 4.0") print("") repo_url = input("➤ Enter the URL of the GitHub repository to migrate (*.git) : ").strip() destination = input("➤ Enter the absolute path of the destination folder : ").strip() if not os.path.isdir(destination): print("❌ The specified folder does not exist.") return repo_name = get_repo_name_from_url(repo_url) repo_path = os.path.join(destination, repo_name) if os.path.exists(repo_path): print(f"⚠️ The '{repo_path}' folder already exists. Delete it or choose another location.") return print(f"📥 Cloning the repository in : {repo_path}") subprocess.run(["git", "clone", repo_url, repo_path], check=True) download_and_replace_images(repo_path) print(f"\n📦 Deposit ready in : {repo_path}") if __name__ == "__main__": main()