Add Git-MRI.py
This commit is contained in:
		
							parent
							
								
									f0778fd368
								
							
						
					
					
						commit
						018dd4f458
					
				
					 1 changed files with 208 additions and 0 deletions
				
			
		
							
								
								
									
										208
									
								
								Git-MRI.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										208
									
								
								Git-MRI.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,208 @@ | |||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import requests | ||||
| import random | ||||
| import string | ||||
| from urllib.parse import urlparse | ||||
| 
 | ||||
| TEXT_EXTENSIONS = {'.md', '.markdown', '.txt', '.rst', '.adoc', '.html'} | ||||
| IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'} | ||||
| LOG_FILENAME = "non_images.log" | ||||
| 
 | ||||
| def get_repo_name_from_url(repo_url): | ||||
|     return os.path.splitext(os.path.basename(urlparse(repo_url).path))[0] | ||||
| 
 | ||||
| def is_text_file(filename): | ||||
|     return os.path.splitext(filename)[1].lower() in TEXT_EXTENSIONS | ||||
| 
 | ||||
| def generate_unique_filename(extension, existing_names): | ||||
|     while True: | ||||
|         name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=20)) | ||||
|         filename = f"{name}{extension}" | ||||
|         if filename not in existing_names: | ||||
|             return filename | ||||
| 
 | ||||
| def mimetotype_to_extension(mime): | ||||
|     mapping = { | ||||
|         "image/jpeg": "jpg", | ||||
|         "image/png": "png", | ||||
|         "image/gif": "gif", | ||||
|         "image/webp": "webp", | ||||
|         "image/svg+xml": "svg" | ||||
|     } | ||||
|     return mapping.get(mime, "img") | ||||
| 
 | ||||
| def download_and_replace_images(repo_path): | ||||
|     images_dir = os.path.join(repo_path, "images") | ||||
|     os.makedirs(images_dir, exist_ok=True) | ||||
| 
 | ||||
|     modified_files = [] | ||||
|     logged_urls = [] | ||||
|     downloaded_files = set() | ||||
|     log_path = os.path.join(repo_path, LOG_FILENAME) | ||||
| 
 | ||||
|     pattern_md = r'(!?\[.*?\]\()(.+?)(\))' | ||||
|     pattern_img_tag = r'(<img[^>]*?src=["\'])(https?[^"\']+)(["\'])' | ||||
| 
 | ||||
|     for root, _, files in os.walk(repo_path): | ||||
|         for file in files: | ||||
|             full_path = os.path.join(root, file) | ||||
|             if not is_text_file(file): | ||||
|                 continue | ||||
| 
 | ||||
|             with open(full_path, "r", encoding="utf-8", errors="ignore") as f: | ||||
|                 content = f.read() | ||||
| 
 | ||||
|             modified = False | ||||
| 
 | ||||
|             def handle_url(url, base_path): | ||||
|                 nonlocal modified | ||||
|                 try: | ||||
|                     r = requests.get(url, timeout=10) | ||||
|                     r.raise_for_status() | ||||
| 
 | ||||
|                     content_type = r.headers.get("Content-Type", "") | ||||
|                     if not content_type.startswith("image/"): | ||||
|                         reason = f"Type no-image ({content_type})" | ||||
|                         logged_urls.append((url, reason)) | ||||
|                         print(f"⚠️ Ignored : {url} — {reason}") | ||||
|                         return None | ||||
| 
 | ||||
|                     ext = os.path.splitext(urlparse(url).path)[1] | ||||
|                     if not ext: | ||||
|                         ext = mimetotype_to_extension(content_type) | ||||
|                     if not ext.startswith('.'): | ||||
|                         ext = '.' + ext | ||||
| 
 | ||||
|                     filename = generate_unique_filename(ext, downloaded_files) | ||||
|                     downloaded_files.add(filename) | ||||
|                     local_path = os.path.join(images_dir, filename) | ||||
| 
 | ||||
|                     with open(local_path, "wb") as img_file: | ||||
|                         img_file.write(r.content) | ||||
|                     print(f"✅ Image downloaded : {filename}") | ||||
| 
 | ||||
|                     rel_path = os.path.relpath(local_path, os.path.dirname(base_path)) | ||||
|                     modified = True | ||||
|                     return rel_path | ||||
| 
 | ||||
|                 except Exception as e: | ||||
|                     reason = f"Error : {e}" | ||||
|                     logged_urls.append((url, reason)) | ||||
|                     print(f"❌ Download error {url} — {e}") | ||||
|                     return None | ||||
| 
 | ||||
|             def replace_md(match): | ||||
|                 url = match.group(2) | ||||
|                 if not url.startswith("http"): | ||||
|                     return match.group(0) | ||||
|                 new_path = handle_url(url, full_path) | ||||
|                 return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) | ||||
| 
 | ||||
|             def replace_img_tag(match): | ||||
|                 url = match.group(2) | ||||
|                 if not url.startswith("http"): | ||||
|                     return match.group(0) | ||||
|                 new_path = handle_url(url, full_path) | ||||
|                 return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0) | ||||
| 
 | ||||
|             content = re.sub(pattern_md, replace_md, content) | ||||
|             content = re.sub(pattern_img_tag, replace_img_tag, content) | ||||
| 
 | ||||
|             if modified: | ||||
|                 with open(full_path, "w", encoding="utf-8") as f: | ||||
|                     f.write(content) | ||||
|                 modified_files.append(full_path) | ||||
|                 print(f"✒️ Modified file : {full_path}") | ||||
| 
 | ||||
|     if logged_urls: | ||||
|         with open(log_path, "w", encoding="utf-8") as log_file: | ||||
|             for url, reason in logged_urls: | ||||
|                 log_file.write(f"{url} - {reason}\n") | ||||
|         print(f"📝 Log file created : {log_path}") | ||||
| 
 | ||||
|     print(f"\n✅ {len(modified_files)} Modified file(s).") | ||||
| 
 | ||||
|     move_all_unused_images_to_old(repo_path) | ||||
| 
 | ||||
| def move_all_unused_images_to_old(repo_path): | ||||
|     images_dir = os.path.join(repo_path, "images") | ||||
|     old_dir = os.path.join(images_dir, "old") | ||||
|     os.makedirs(old_dir, exist_ok=True) | ||||
| 
 | ||||
|     all_image_paths = [] | ||||
|     for root, _, files in os.walk(repo_path): | ||||
|         if '.git' in root: | ||||
|             continue | ||||
|         for file in files: | ||||
|             ext = os.path.splitext(file)[1].lower() | ||||
|             if ext in IMAGE_EXTENSIONS: | ||||
|                 full_path = os.path.join(root, file) | ||||
|                 rel_path = os.path.relpath(full_path, repo_path) | ||||
|                 all_image_paths.append((rel_path, full_path)) | ||||
| 
 | ||||
|     referenced = set() | ||||
|     for root, _, files in os.walk(repo_path): | ||||
|         for file in files: | ||||
|             if not is_text_file(file): | ||||
|                 continue | ||||
|             full_path = os.path.join(root, file) | ||||
|             with open(full_path, "r", encoding="utf-8", errors="ignore") as f: | ||||
|                 content = f.read() | ||||
|                 for rel_path, _ in all_image_paths: | ||||
|                     if rel_path in content or os.path.basename(rel_path) in content: | ||||
|                         referenced.add(rel_path) | ||||
| 
 | ||||
|     moved = 0 | ||||
|     for rel_path, full_path in all_image_paths: | ||||
|         if rel_path in referenced: | ||||
|             continue | ||||
|         filename = os.path.basename(full_path) | ||||
|         target_path = os.path.join(old_dir, filename) | ||||
| 
 | ||||
|         count = 1 | ||||
|         while os.path.exists(target_path): | ||||
|             name, ext = os.path.splitext(filename) | ||||
|             target_path = os.path.join(old_dir, f"{name}_{count}{ext}") | ||||
|             count += 1 | ||||
| 
 | ||||
|         os.rename(full_path, target_path) | ||||
|         print(f"↴ Image moved : {rel_path} → images/old/{os.path.basename(target_path)}") | ||||
|         moved += 1 | ||||
| 
 | ||||
|     print(f"\n↴ {moved} unreferenced image(s) moved in images/old/") | ||||
| 
 | ||||
| def main(): | ||||
|     print("") | ||||
|     print("▄▖▘▗   ▖  ▖▄▖▄▖") | ||||
|     print("▌ ▌▜▘▄▖▛▖▞▌▙▘▐ ") | ||||
|     print("▙▌▌▐▖  ▌▝ ▌▌▌▟▖") | ||||
|     print("") | ||||
|     print("Ce programme python récupère un dépot git avec toute les images et les enregistres localement en mettant à jour les liens vers celles-ci.") | ||||
|     print("License: CC BY-NC-SA 4.0") | ||||
|     print("") | ||||
| 
 | ||||
|     repo_url = input("➤ Enter the URL of the GitHub repository to migrate (*.git) : ").strip() | ||||
|     destination = input("➤ Enter the absolute path of the destination folder : ").strip() | ||||
| 
 | ||||
|     if not os.path.isdir(destination): | ||||
|         print("❌ The specified folder does not exist.") | ||||
|         return | ||||
| 
 | ||||
|     repo_name = get_repo_name_from_url(repo_url) | ||||
|     repo_path = os.path.join(destination, repo_name) | ||||
| 
 | ||||
|     if os.path.exists(repo_path): | ||||
|         print(f"⚠️ The '{repo_path}' folder already exists. Delete it or choose another location.") | ||||
|         return | ||||
| 
 | ||||
|     print(f"📥 Cloning the repository in : {repo_path}") | ||||
|     subprocess.run(["git", "clone", repo_url, repo_path], check=True) | ||||
| 
 | ||||
|     download_and_replace_images(repo_path) | ||||
| 
 | ||||
|     print(f"\n📦 Deposit ready in : {repo_path}") | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue