diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b2c56a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +combined_output.txt diff --git a/stitcher.py b/stitcher.py new file mode 100755 index 0000000..07a8126 --- /dev/null +++ b/stitcher.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +import os +import re +import argparse +from pathlib import Path + + +def is_binary(file_path: Path) -> bool: + """ + Detect binary files by checking for null bytes. + """ + try: + with file_path.open("rb") as f: + chunk = f.read(1024) + return b"\x00" in chunk + except Exception: + return True + + +def should_exclude(path: Path, exclude_pattern: re.Pattern) -> bool: + """ + Check if path matches exclusion regex. + """ + return bool(exclude_pattern.search(str(path))) + + +def concatenate_files(root_dir: Path, output_file: Path, exclude_regex: str): + exclude_pattern = re.compile(exclude_regex) + + with output_file.open("w", encoding="utf-8") as outfile: + for path in root_dir.rglob("*"): + if not path.is_file(): + continue + + if path.resolve() == output_file.resolve(): + continue + + if should_exclude(path, exclude_pattern): + continue + + if is_binary(path): + continue + + try: + content = path.read_text(encoding="utf-8", errors="replace") + except Exception: + continue + + outfile.write(f"===== FILE: {path} =====\n") + outfile.write(content) + outfile.write("\n\n") + + print(f"Done. Output written to {output_file}") + + +def main(): + parser = argparse.ArgumentParser( + description="Recursively concatenate file contents into a single txt file." + ) + parser.add_argument( + "-d", + "--directory", + type=str, + default=".", + help="Root directory to search (default: current directory)", + ) + parser.add_argument( + "-o", + "--output", + type=str, + default="combined_output.txt", + help="Output file (default: combined_output.txt)", + ) + parser.add_argument( + "-e", + "--exclude", + type=str, + default=r"\.git", + help=r"Regex pattern for exclusions (default: \.git)", + ) + + args = parser.parse_args() + + root_dir = Path(args.directory).resolve() + output_file = Path(args.output).resolve() + + concatenate_files(root_dir, output_file, args.exclude) + + +if __name__ == "__main__": + main() +