feat: script that stitches files in a dir together
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
combined_output.txt
|
||||
93
stitcher.py
Executable file
93
stitcher.py
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def is_binary(file_path: Path) -> bool:
|
||||
"""
|
||||
Detect binary files by checking for null bytes.
|
||||
"""
|
||||
try:
|
||||
with file_path.open("rb") as f:
|
||||
chunk = f.read(1024)
|
||||
return b"\x00" in chunk
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
def should_exclude(path: Path, exclude_pattern: re.Pattern) -> bool:
|
||||
"""
|
||||
Check if path matches exclusion regex.
|
||||
"""
|
||||
return bool(exclude_pattern.search(str(path)))
|
||||
|
||||
|
||||
def concatenate_files(root_dir: Path, output_file: Path, exclude_regex: str):
|
||||
exclude_pattern = re.compile(exclude_regex)
|
||||
|
||||
with output_file.open("w", encoding="utf-8") as outfile:
|
||||
for path in root_dir.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
|
||||
if path.resolve() == output_file.resolve():
|
||||
continue
|
||||
|
||||
if should_exclude(path, exclude_pattern):
|
||||
continue
|
||||
|
||||
if is_binary(path):
|
||||
continue
|
||||
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
outfile.write(f"===== FILE: {path} =====\n")
|
||||
outfile.write(content)
|
||||
outfile.write("\n\n")
|
||||
|
||||
print(f"Done. Output written to {output_file}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Recursively concatenate file contents into a single txt file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--directory",
|
||||
type=str,
|
||||
default=".",
|
||||
help="Root directory to search (default: current directory)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
type=str,
|
||||
default="combined_output.txt",
|
||||
help="Output file (default: combined_output.txt)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--exclude",
|
||||
type=str,
|
||||
default=r"\.git",
|
||||
help=r"Regex pattern for exclusions (default: \.git)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
root_dir = Path(args.directory).resolve()
|
||||
output_file = Path(args.output).resolve()
|
||||
|
||||
concatenate_files(root_dir, output_file, args.exclude)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user