From 1bd82158fc0b402112861ef25f2d189f3f3c8286 Mon Sep 17 00:00:00 2001 From: enias Date: Sun, 24 Aug 2025 22:15:38 -0400 Subject: [PATCH] Initial commit: YouTube Summarizer project setup - Created project structure following new standardized layout - Added FastAPI-based main application - Configured requirements with YouTube and AI integrations - Added comprehensive README documentation - Set up environment configuration template --- .env.example | 40 +++++++++++ .gitignore | 53 +++++++++++++++ README.md | 173 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 44 ++++++++++++ src/main.py | 106 +++++++++++++++++++++++++++++ 5 files changed, 416 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 src/main.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..ea8a9b8 --- /dev/null +++ b/.env.example @@ -0,0 +1,40 @@ +# YouTube Summarizer Configuration + +# Server Configuration +APP_HOST=0.0.0.0 +APP_PORT=8082 +DEBUG=False + +# YouTube API (optional but recommended for metadata) +YOUTUBE_API_KEY=your_youtube_api_key_here + +# AI Service Configuration (choose one or multiple) +# OpenAI +OPENAI_API_KEY=your_openai_api_key_here +OPENAI_MODEL=gpt-4o-mini + +# Anthropic Claude +ANTHROPIC_API_KEY=your_anthropic_api_key_here +ANTHROPIC_MODEL=claude-3-haiku-20240307 + +# DeepSeek (cost-effective option) +DEEPSEEK_API_KEY=your_deepseek_api_key_here +DEEPSEEK_MODEL=deepseek-chat + +# Database +DATABASE_URL=sqlite:///./data/youtube_summarizer.db + +# Session Configuration +SECRET_KEY=your-secret-key-here-change-in-production + +# Rate Limiting +RATE_LIMIT_PER_MINUTE=30 +MAX_VIDEO_LENGTH_MINUTES=180 + +# Cache Configuration +ENABLE_CACHE=True +CACHE_TTL_HOURS=24 + +# Logging +LOG_LEVEL=INFO +LOG_FILE=logs/app.log \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..235b68f --- /dev/null +++ b/.gitignore @@ -0,0 +1,53 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Environment +.env +.env.local +.env.*.local + +# Testing +.coverage +htmlcov/ +.pytest_cache/ +.tox/ + +# Build +build/ +dist/ +*.egg-info/ +.eggs/ + +# Logs +*.log +logs/ + +# Database +*.db +*.sqlite +*.sqlite3 + +# Media/Data +data/ +downloads/ +cache/ + +# Task Master +.taskmaster/reports/ +.taskmaster/backups/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..77a8911 --- /dev/null +++ b/README.md @@ -0,0 +1,173 @@ +# YouTube Summarizer Web Application + +An AI-powered web application that automatically extracts, transcribes, and summarizes YouTube videos, providing intelligent insights and key takeaways. + +## ๐ŸŽฏ Features + +- **Video Transcript Extraction**: Automatically fetch transcripts from YouTube videos +- **AI-Powered Summarization**: Generate concise summaries using multiple AI models +- **Multi-Model Support**: Choose between OpenAI GPT, Anthropic Claude, or DeepSeek +- **Key Points Extraction**: Identify and highlight main topics and insights +- **Chapter Generation**: Automatically create timestamped chapters +- **Export Options**: Save summaries as Markdown, PDF, or plain text +- **Caching System**: Reduce API calls with intelligent caching +- **Rate Limiting**: Built-in protection against API overuse + +## ๐Ÿ—๏ธ Architecture + +``` +[Web Interface] โ†’ [FastAPI Backend] โ†’ [YouTube API/Transcript API] + โ†“ +[AI Service] โ† [Summary Generation] โ† [Transcript Processing] + โ†“ +[Database Cache] โ†’ [Summary Storage] โ†’ [Export Service] +``` + +## ๐Ÿš€ Quick Start + +### Prerequisites + +- Python 3.11+ +- YouTube API Key (optional but recommended) +- At least one AI service API key (OpenAI, Anthropic, or DeepSeek) + +### Installation + +1. **Clone the repository** +```bash +git clone https://eniasgit.zeabur.app/demo/youtube-summarizer.git +cd youtube-summarizer +``` + +2. **Set up virtual environment** +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +3. **Install dependencies** +```bash +pip install -r requirements.txt +``` + +4. **Configure environment** +```bash +cp .env.example .env +# Edit .env with your API keys and configuration +``` + +5. **Initialize database** +```bash +alembic init alembic +alembic revision --autogenerate -m "Initial migration" +alembic upgrade head +``` + +6. **Run the application** +```bash +python src/main.py +``` + +The application will be available at `http://localhost:8082` + +## ๐Ÿ“ Project Structure + +``` +youtube-summarizer/ +โ”œโ”€โ”€ src/ +โ”‚ โ”œโ”€โ”€ api/ # API endpoints +โ”‚ โ”‚ โ”œโ”€โ”€ routes.py # Main API routes +โ”‚ โ”‚ โ””โ”€โ”€ websocket.py # Real-time updates +โ”‚ โ”œโ”€โ”€ services/ # Business logic +โ”‚ โ”‚ โ”œโ”€โ”€ youtube.py # YouTube integration +โ”‚ โ”‚ โ”œโ”€โ”€ summarizer.py # AI summarization +โ”‚ โ”‚ โ””โ”€โ”€ cache.py # Caching service +โ”‚ โ”œโ”€โ”€ utils/ # Utility functions +โ”‚ โ”‚ โ”œโ”€โ”€ validators.py # Input validation +โ”‚ โ”‚ โ””โ”€โ”€ formatters.py # Output formatting +โ”‚ โ””โ”€โ”€ main.py # Application entry point +โ”œโ”€โ”€ tests/ # Test suite +โ”œโ”€โ”€ docs/ # Documentation +โ”œโ”€โ”€ alembic/ # Database migrations +โ”œโ”€โ”€ static/ # Frontend assets +โ”œโ”€โ”€ templates/ # HTML templates +โ”œโ”€โ”€ requirements.txt # Python dependencies +โ”œโ”€โ”€ .env.example # Environment template +โ””โ”€โ”€ README.md # This file +``` + +## ๐Ÿ”ง Configuration + +### Essential Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `YOUTUBE_API_KEY` | YouTube Data API v3 key | Optional* | +| `OPENAI_API_KEY` | OpenAI API key | One of these | +| `ANTHROPIC_API_KEY` | Anthropic Claude API key | is required | +| `DEEPSEEK_API_KEY` | DeepSeek API key | for AI | +| `DATABASE_URL` | Database connection string | Yes | +| `SECRET_KEY` | Session secret key | Yes | + +*YouTube API key improves metadata fetching but transcript extraction works without it. + +## ๐Ÿงช Testing + +Run the test suite: +```bash +pytest tests/ -v +pytest tests/ --cov=src --cov-report=html # With coverage +``` + +## ๐Ÿ“ API Documentation + +Once running, visit: +- Interactive API docs: `http://localhost:8082/docs` +- Alternative docs: `http://localhost:8082/redoc` + +### Key Endpoints + +- `POST /api/summarize` - Submit a YouTube URL for summarization +- `GET /api/summary/{id}` - Retrieve a summary +- `GET /api/summaries` - List all summaries +- `POST /api/export/{id}` - Export summary in different formats + +## ๐Ÿšข Deployment + +### Docker + +```bash +docker build -t youtube-summarizer . +docker run -p 8082:8082 --env-file .env youtube-summarizer +``` + +### Production Considerations + +1. Use PostgreSQL instead of SQLite for production +2. Configure proper CORS settings +3. Set up SSL/TLS certificates +4. Implement user authentication +5. Configure rate limiting per user +6. Set up monitoring and logging + +## ๐Ÿค Contributing + +1. Fork the repository +2. Create a feature branch +3. Commit your changes +4. Push to the branch +5. Create a Pull Request + +## ๐Ÿ“„ License + +This project is part of the Personal AI Assistant ecosystem. + +## ๐Ÿ”— Related Projects + +- [Personal AI Assistant](https://eniasgit.zeabur.app/demo/my-ai-projects) +- [YouTube Automation Service](https://eniasgit.zeabur.app/demo/youtube-automation) +- [PDF Translator](https://eniasgit.zeabur.app/demo/pdf-translator) + +## ๐Ÿ“ž Support + +For issues and questions, please create an issue in the repository. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6c1c199 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,44 @@ +# YouTube Summarizer Web Application Requirements + +# Web framework +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +python-multipart==0.0.6 + +# YouTube integration +youtube-transcript-api==0.6.1 +pytube==15.0.0 +yt-dlp==2024.8.6 + +# AI and NLP +openai==1.35.0 +anthropic==0.28.0 +langchain==0.2.5 +tiktoken==0.7.0 + +# Database +sqlalchemy==2.0.23 +alembic==1.13.0 + +# HTTP client +httpx==0.25.2 +requests==2.31.0 + +# Environment and configuration +python-dotenv==1.0.0 +pydantic==2.5.2 +pydantic-settings==2.1.0 + +# Utilities +python-dateutil==2.8.2 +pytz==2024.1 + +# Testing +pytest==7.4.3 +pytest-asyncio==0.21.1 +pytest-cov==4.1.0 + +# Development +black==23.12.0 +ruff==0.1.8 +mypy==1.7.1 \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..423b00a --- /dev/null +++ b/src/main.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +YouTube Summarizer Web Application +Main entry point for the FastAPI application +""" + +import os +import sys +from pathlib import Path +from contextlib import asynccontextmanager + +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from dotenv import load_dotenv + +# Add project root to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Load environment variables +load_dotenv() + +# Import routers (to be created) +# from api.routes import api_router +# from api.websocket import websocket_router + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manage application lifecycle + """ + # Startup + print("๐Ÿš€ Starting YouTube Summarizer Application...") + + # Initialize services here + # await initialize_database() + # await initialize_cache() + + yield + + # Shutdown + print("๐Ÿ›‘ Shutting down YouTube Summarizer Application...") + # await cleanup_resources() + +# Create FastAPI application +app = FastAPI( + title="YouTube Summarizer", + description="AI-powered YouTube video summarization service", + version="1.0.0", + lifespan=lifespan +) + +# Configure CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # Configure appropriately for production + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Mount static files (if needed) +# app.mount("/static", StaticFiles(directory="static"), name="static") + +# Include routers +# app.include_router(api_router, prefix="/api") +# app.include_router(websocket_router) + +@app.get("/") +async def root(): + """ + Root endpoint + """ + return { + "message": "YouTube Summarizer API", + "version": "1.0.0", + "docs": "/docs", + "health": "/health" + } + +@app.get("/health") +async def health_check(): + """ + Health check endpoint + """ + return { + "status": "healthy", + "service": "youtube-summarizer" + } + +if __name__ == "__main__": + import uvicorn + + host = os.getenv("APP_HOST", "0.0.0.0") + port = int(os.getenv("APP_PORT", 8082)) + debug = os.getenv("DEBUG", "False").lower() == "true" + + print(f"๐ŸŽฌ YouTube Summarizer starting on http://{host}:{port}") + + uvicorn.run( + "main:app", + host=host, + port=port, + reload=debug, + log_level="debug" if debug else "info" + ) \ No newline at end of file