From 018dd4f458b89d248a2be7a3e28210d525bba6e2 Mon Sep 17 00:00:00 2001 From: Willy Date: Sat, 31 May 2025 16:01:01 +0200 Subject: [PATCH] Add Git-MRI.py --- Git-MRI.py | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 Git-MRI.py diff --git a/Git-MRI.py b/Git-MRI.py new file mode 100644 index 0000000..687b827 --- /dev/null +++ b/Git-MRI.py @@ -0,0 +1,208 @@ +import os +import re +import subprocess +import requests +import random +import string +from urllib.parse import urlparse + +TEXT_EXTENSIONS = {'.md', '.markdown', '.txt', '.rst', '.adoc', '.html'} +IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'} +LOG_FILENAME = "non_images.log" + +def get_repo_name_from_url(repo_url): + return os.path.splitext(os.path.basename(urlparse(repo_url).path))[0] + +def is_text_file(filename): + return os.path.splitext(filename)[1].lower() in TEXT_EXTENSIONS + +def generate_unique_filename(extension, existing_names): + while True: + name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=20)) + filename = f"{name}{extension}" + if filename not in existing_names: + return filename + +def mimetotype_to_extension(mime): + mapping = { + "image/jpeg": "jpg", + "image/png": "png", + "image/gif": "gif", + "image/webp": "webp", + "image/svg+xml": "svg" + } + return mapping.get(mime, "img") + +def download_and_replace_images(repo_path): + images_dir = os.path.join(repo_path, "images") + os.makedirs(images_dir, exist_ok=True) + + modified_files = [] + logged_urls = [] + downloaded_files = set() + log_path = os.path.join(repo_path, LOG_FILENAME) + + pattern_md = r'(!?\[.*?\]\()(.+?)(\))' + pattern_img_tag = r'(]*?src=["\'])(https?[^"\']+)(["\'])' + + for root, _, files in os.walk(repo_path): + for file in files: + full_path = os.path.join(root, file) + if not is_text_file(file): + continue + + with open(full_path, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + + modified = False + + def handle_url(url, base_path): + nonlocal modified + try: + r = requests.get(url, timeout=10) + r.raise_for_status() + + content_type = r.headers.get("Content-Type", "") + if not content_type.startswith("image/"): + reason = f"Type no-image ({content_type})" + logged_urls.append((url, reason)) + print(f"⚠️ Ignored : {url} — {reason}") + return None + + ext = os.path.splitext(urlparse(url).path)[1] + if not ext: + ext = mimetotype_to_extension(content_type) + if not ext.startswith('.'): + ext = '.' + ext + + filename = generate_unique_filename(ext, downloaded_files) + downloaded_files.add(filename) + local_path = os.path.join(images_dir, filename) + + with open(local_path, "wb") as img_file: + img_file.write(r.content) + print(f"✅ Image downloaded : {filename}") + + rel_path = os.path.relpath(local_path, os.path.dirname(base_path)) + modified = True + return rel_path + + except Exception as e: + reason = f"Error : {e}" + logged_urls.append((url, reason)) + print(f"❌ Download error {url} — {e}") + return None + + def replace_md(match): + url = match.group(2) + if not url.startswith("http"): + return match.group(0) + new_path = handle_url(url, full_path) + return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) + + def replace_img_tag(match): + url = match.group(2) + if not url.startswith("http"): + return match.group(0) + new_path = handle_url(url, full_path) + return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) + + content = re.sub(pattern_md, replace_md, content) + content = re.sub(pattern_img_tag, replace_img_tag, content) + + if modified: + with open(full_path, "w", encoding="utf-8") as f: + f.write(content) + modified_files.append(full_path) + print(f"✒️ Modified file : {full_path}") + + if logged_urls: + with open(log_path, "w", encoding="utf-8") as log_file: + for url, reason in logged_urls: + log_file.write(f"{url} - {reason}\n") + print(f"📝 Log file created : {log_path}") + + print(f"\n✅ {len(modified_files)} Modified file(s).") + + move_all_unused_images_to_old(repo_path) + +def move_all_unused_images_to_old(repo_path): + images_dir = os.path.join(repo_path, "images") + old_dir = os.path.join(images_dir, "old") + os.makedirs(old_dir, exist_ok=True) + + all_image_paths = [] + for root, _, files in os.walk(repo_path): + if '.git' in root: + continue + for file in files: + ext = os.path.splitext(file)[1].lower() + if ext in IMAGE_EXTENSIONS: + full_path = os.path.join(root, file) + rel_path = os.path.relpath(full_path, repo_path) + all_image_paths.append((rel_path, full_path)) + + referenced = set() + for root, _, files in os.walk(repo_path): + for file in files: + if not is_text_file(file): + continue + full_path = os.path.join(root, file) + with open(full_path, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + for rel_path, _ in all_image_paths: + if rel_path in content or os.path.basename(rel_path) in content: + referenced.add(rel_path) + + moved = 0 + for rel_path, full_path in all_image_paths: + if rel_path in referenced: + continue + filename = os.path.basename(full_path) + target_path = os.path.join(old_dir, filename) + + count = 1 + while os.path.exists(target_path): + name, ext = os.path.splitext(filename) + target_path = os.path.join(old_dir, f"{name}_{count}{ext}") + count += 1 + + os.rename(full_path, target_path) + print(f"↴ Image moved : {rel_path} → images/old/{os.path.basename(target_path)}") + moved += 1 + + print(f"\n↴ {moved} unreferenced image(s) moved in images/old/") + +def main(): + print("") + print("▄▖▘▗ ▖ ▖▄▖▄▖") + print("▌ ▌▜▘▄▖▛▖▞▌▙▘▐ ") + print("▙▌▌▐▖ ▌▝ ▌▌▌▟▖") + print("") + print("Ce programme python récupère un dépot git avec toute les images et les enregistres localement en mettant à jour les liens vers celles-ci.") + print("License: CC BY-NC-SA 4.0") + print("") + + repo_url = input("➤ Enter the URL of the GitHub repository to migrate (*.git) : ").strip() + destination = input("➤ Enter the absolute path of the destination folder : ").strip() + + if not os.path.isdir(destination): + print("❌ The specified folder does not exist.") + return + + repo_name = get_repo_name_from_url(repo_url) + repo_path = os.path.join(destination, repo_name) + + if os.path.exists(repo_path): + print(f"⚠️ The '{repo_path}' folder already exists. Delete it or choose another location.") + return + + print(f"📥 Cloning the repository in : {repo_path}") + subprocess.run(["git", "clone", repo_url, repo_path], check=True) + + download_and_replace_images(repo_path) + + print(f"\n📦 Deposit ready in : {repo_path}") + +if __name__ == "__main__": + main()