#!/usr/bin/env python3 """ Script to process videos.csv through the Trax download and transcribe pipeline. Processes URLs from videos.csv (one per line) and runs batch processing. """ import subprocess import sys from pathlib import Path def run_batch_processing(csv_file: str): """Run the batch processing pipeline.""" try: print(f"\nšŸš€ Starting batch processing for: {csv_file}") # Step 1: Download and extract metadata print("\nšŸ“„ Step 1: Downloading videos and extracting metadata...") result = subprocess.run([ "uv", "run", "python", "-m", "src.cli.main", "batch-urls", csv_file, "--download" ], capture_output=True, text=True) if result.returncode != 0: print(f"āŒ Error in download step: {result.stderr}") return False print("āœ… Download and metadata extraction completed") # Step 2: Transcribe all downloaded videos print("\nšŸŽ¤ Step 2: Transcribing videos...") # Use the batch command to process all downloaded files result = subprocess.run([ "uv", "run", "python", "-m", "src.cli.main", "batch", "data/media/downloads", "--v1" ], capture_output=True, text=True) if result.returncode != 0: print(f"āŒ Error in transcription step: {result.stderr}") return False print("āœ… Transcription completed") return True except Exception as e: print(f"āŒ Error in batch processing: {e}") return False def count_urls(csv_file: str) -> int: """Count the number of URLs in the CSV file.""" try: with open(csv_file, 'r') as f: urls = [line.strip() for line in f if line.strip()] return len(urls) except Exception as e: print(f"āŒ Error counting URLs: {e}") return 0 def main(): """Main function to process videos.csv.""" csv_file = "videos.csv" print("šŸŽ¬ Trax Video Processing Pipeline") print("=" * 40) # Check if videos.csv exists if not Path(csv_file).exists(): print(f"āŒ {csv_file} not found!") return 1 # Count URLs url_count = count_urls(csv_file) if url_count == 0: print("āŒ No URLs found in the file!") return 1 print(f"šŸ“‹ Found {url_count} URLs in {csv_file}") # Run batch processing success = run_batch_processing(csv_file) if success: print("\nšŸŽ‰ Pipeline completed successfully!") print(f"šŸ“Š Processed {url_count} videos") print("šŸ“ Check the data/ directory for results") print("šŸ“ Transcripts available in data/exports/") else: print("\nāŒ Pipeline failed!") return 1 return 0 if __name__ == "__main__": sys.exit(main())