208 lines
7.5 KiB
Python
208 lines
7.5 KiB
Python
import os
|
|
import re
|
|
import subprocess
|
|
import requests
|
|
import random
|
|
import string
|
|
from urllib.parse import urlparse
|
|
|
|
TEXT_EXTENSIONS = {'.md', '.markdown', '.txt', '.rst', '.adoc', '.html'}
|
|
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'}
|
|
LOG_FILENAME = "non_images.log"
|
|
|
|
def get_repo_name_from_url(repo_url):
|
|
return os.path.splitext(os.path.basename(urlparse(repo_url).path))[0]
|
|
|
|
def is_text_file(filename):
|
|
return os.path.splitext(filename)[1].lower() in TEXT_EXTENSIONS
|
|
|
|
def generate_unique_filename(extension, existing_names):
|
|
while True:
|
|
name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=20))
|
|
filename = f"{name}{extension}"
|
|
if filename not in existing_names:
|
|
return filename
|
|
|
|
def mimetotype_to_extension(mime):
|
|
mapping = {
|
|
"image/jpeg": "jpg",
|
|
"image/png": "png",
|
|
"image/gif": "gif",
|
|
"image/webp": "webp",
|
|
"image/svg+xml": "svg"
|
|
}
|
|
return mapping.get(mime, "img")
|
|
|
|
def download_and_replace_images(repo_path):
|
|
images_dir = os.path.join(repo_path, "images")
|
|
os.makedirs(images_dir, exist_ok=True)
|
|
|
|
modified_files = []
|
|
logged_urls = []
|
|
downloaded_files = set()
|
|
log_path = os.path.join(repo_path, LOG_FILENAME)
|
|
|
|
pattern_md = r'(!?\[.*?\]\()(.+?)(\))'
|
|
pattern_img_tag = r'(<img[^>]*?src=["\'])(https?[^"\']+)(["\'])'
|
|
|
|
for root, _, files in os.walk(repo_path):
|
|
for file in files:
|
|
full_path = os.path.join(root, file)
|
|
if not is_text_file(file):
|
|
continue
|
|
|
|
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
content = f.read()
|
|
|
|
modified = False
|
|
|
|
def handle_url(url, base_path):
|
|
nonlocal modified
|
|
try:
|
|
r = requests.get(url, timeout=10)
|
|
r.raise_for_status()
|
|
|
|
content_type = r.headers.get("Content-Type", "")
|
|
if not content_type.startswith("image/"):
|
|
reason = f"Type no-image ({content_type})"
|
|
logged_urls.append((url, reason))
|
|
print(f"⚠️ Ignored : {url} — {reason}")
|
|
return None
|
|
|
|
ext = os.path.splitext(urlparse(url).path)[1]
|
|
if not ext:
|
|
ext = mimetotype_to_extension(content_type)
|
|
if not ext.startswith('.'):
|
|
ext = '.' + ext
|
|
|
|
filename = generate_unique_filename(ext, downloaded_files)
|
|
downloaded_files.add(filename)
|
|
local_path = os.path.join(images_dir, filename)
|
|
|
|
with open(local_path, "wb") as img_file:
|
|
img_file.write(r.content)
|
|
print(f"✅ Image downloaded : {filename}")
|
|
|
|
rel_path = os.path.relpath(local_path, os.path.dirname(base_path))
|
|
modified = True
|
|
return rel_path
|
|
|
|
except Exception as e:
|
|
reason = f"Error : {e}"
|
|
logged_urls.append((url, reason))
|
|
print(f"❌ Download error {url} — {e}")
|
|
return None
|
|
|
|
def replace_md(match):
|
|
url = match.group(2)
|
|
if not url.startswith("http"):
|
|
return match.group(0)
|
|
new_path = handle_url(url, full_path)
|
|
return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0)
|
|
|
|
def replace_img_tag(match):
|
|
url = match.group(2)
|
|
if not url.startswith("http"):
|
|
return match.group(0)
|
|
new_path = handle_url(url, full_path)
|
|
return f"{match.group(1)}{new_path}{match.group(3)}" if new_path else match.group(0)
|
|
|
|
content = re.sub(pattern_md, replace_md, content)
|
|
content = re.sub(pattern_img_tag, replace_img_tag, content)
|
|
|
|
if modified:
|
|
with open(full_path, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
modified_files.append(full_path)
|
|
print(f"✒️ Modified file : {full_path}")
|
|
|
|
if logged_urls:
|
|
with open(log_path, "w", encoding="utf-8") as log_file:
|
|
for url, reason in logged_urls:
|
|
log_file.write(f"{url} - {reason}\n")
|
|
print(f"📝 Log file created : {log_path}")
|
|
|
|
print(f"\n✅ {len(modified_files)} Modified file(s).")
|
|
|
|
move_all_unused_images_to_old(repo_path)
|
|
|
|
def move_all_unused_images_to_old(repo_path):
|
|
images_dir = os.path.join(repo_path, "images")
|
|
old_dir = os.path.join(images_dir, "old")
|
|
os.makedirs(old_dir, exist_ok=True)
|
|
|
|
all_image_paths = []
|
|
for root, _, files in os.walk(repo_path):
|
|
if '.git' in root:
|
|
continue
|
|
for file in files:
|
|
ext = os.path.splitext(file)[1].lower()
|
|
if ext in IMAGE_EXTENSIONS:
|
|
full_path = os.path.join(root, file)
|
|
rel_path = os.path.relpath(full_path, repo_path)
|
|
all_image_paths.append((rel_path, full_path))
|
|
|
|
referenced = set()
|
|
for root, _, files in os.walk(repo_path):
|
|
for file in files:
|
|
if not is_text_file(file):
|
|
continue
|
|
full_path = os.path.join(root, file)
|
|
with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
content = f.read()
|
|
for rel_path, _ in all_image_paths:
|
|
if rel_path in content or os.path.basename(rel_path) in content:
|
|
referenced.add(rel_path)
|
|
|
|
moved = 0
|
|
for rel_path, full_path in all_image_paths:
|
|
if rel_path in referenced:
|
|
continue
|
|
filename = os.path.basename(full_path)
|
|
target_path = os.path.join(old_dir, filename)
|
|
|
|
count = 1
|
|
while os.path.exists(target_path):
|
|
name, ext = os.path.splitext(filename)
|
|
target_path = os.path.join(old_dir, f"{name}_{count}{ext}")
|
|
count += 1
|
|
|
|
os.rename(full_path, target_path)
|
|
print(f"↴ Image moved : {rel_path} → images/old/{os.path.basename(target_path)}")
|
|
moved += 1
|
|
|
|
print(f"\n↴ {moved} unreferenced image(s) moved in images/old/")
|
|
|
|
def main():
|
|
print("")
|
|
print("▄▖▘▗ ▖ ▖▄▖▄▖")
|
|
print("▌ ▌▜▘▄▖▛▖▞▌▙▘▐ ")
|
|
print("▙▌▌▐▖ ▌▝ ▌▌▌▟▖")
|
|
print("")
|
|
print("Ce programme python récupère un dépot git avec toute les images et les enregistres localement en mettant à jour les liens vers celles-ci.")
|
|
print("License: CC BY-NC-SA 4.0")
|
|
print("")
|
|
|
|
repo_url = input("➤ Enter the URL of the GitHub repository to migrate (*.git) : ").strip()
|
|
destination = input("➤ Enter the absolute path of the destination folder : ").strip()
|
|
|
|
if not os.path.isdir(destination):
|
|
print("❌ The specified folder does not exist.")
|
|
return
|
|
|
|
repo_name = get_repo_name_from_url(repo_url)
|
|
repo_path = os.path.join(destination, repo_name)
|
|
|
|
if os.path.exists(repo_path):
|
|
print(f"⚠️ The '{repo_path}' folder already exists. Delete it or choose another location.")
|
|
return
|
|
|
|
print(f"📥 Cloning the repository in : {repo_path}")
|
|
subprocess.run(["git", "clone", repo_url, repo_path], check=True)
|
|
|
|
download_and_replace_images(repo_path)
|
|
|
|
print(f"\n📦 Deposit ready in : {repo_path}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|