Initial commit: YouTube Summarizer project setup
- Created project structure following new standardized layout - Added FastAPI-based main application - Configured requirements with YouTube and AI integrations - Added comprehensive README documentation - Set up environment configuration template
This commit is contained in:
commit
1bd82158fc
|
|
@ -0,0 +1,40 @@
|
||||||
|
# YouTube Summarizer Configuration
|
||||||
|
|
||||||
|
# Server Configuration
|
||||||
|
APP_HOST=0.0.0.0
|
||||||
|
APP_PORT=8082
|
||||||
|
DEBUG=False
|
||||||
|
|
||||||
|
# YouTube API (optional but recommended for metadata)
|
||||||
|
YOUTUBE_API_KEY=your_youtube_api_key_here
|
||||||
|
|
||||||
|
# AI Service Configuration (choose one or multiple)
|
||||||
|
# OpenAI
|
||||||
|
OPENAI_API_KEY=your_openai_api_key_here
|
||||||
|
OPENAI_MODEL=gpt-4o-mini
|
||||||
|
|
||||||
|
# Anthropic Claude
|
||||||
|
ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
||||||
|
ANTHROPIC_MODEL=claude-3-haiku-20240307
|
||||||
|
|
||||||
|
# DeepSeek (cost-effective option)
|
||||||
|
DEEPSEEK_API_KEY=your_deepseek_api_key_here
|
||||||
|
DEEPSEEK_MODEL=deepseek-chat
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=sqlite:///./data/youtube_summarizer.db
|
||||||
|
|
||||||
|
# Session Configuration
|
||||||
|
SECRET_KEY=your-secret-key-here-change-in-production
|
||||||
|
|
||||||
|
# Rate Limiting
|
||||||
|
RATE_LIMIT_PER_MINUTE=30
|
||||||
|
MAX_VIDEO_LENGTH_MINUTES=180
|
||||||
|
|
||||||
|
# Cache Configuration
|
||||||
|
ENABLE_CACHE=True
|
||||||
|
CACHE_TTL_HOURS=24
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
LOG_FILE=logs/app.log
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
.venv
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
.pytest_cache/
|
||||||
|
.tox/
|
||||||
|
|
||||||
|
# Build
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
*.egg-info/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# Media/Data
|
||||||
|
data/
|
||||||
|
downloads/
|
||||||
|
cache/
|
||||||
|
|
||||||
|
# Task Master
|
||||||
|
.taskmaster/reports/
|
||||||
|
.taskmaster/backups/
|
||||||
|
|
@ -0,0 +1,173 @@
|
||||||
|
# YouTube Summarizer Web Application
|
||||||
|
|
||||||
|
An AI-powered web application that automatically extracts, transcribes, and summarizes YouTube videos, providing intelligent insights and key takeaways.
|
||||||
|
|
||||||
|
## 🎯 Features
|
||||||
|
|
||||||
|
- **Video Transcript Extraction**: Automatically fetch transcripts from YouTube videos
|
||||||
|
- **AI-Powered Summarization**: Generate concise summaries using multiple AI models
|
||||||
|
- **Multi-Model Support**: Choose between OpenAI GPT, Anthropic Claude, or DeepSeek
|
||||||
|
- **Key Points Extraction**: Identify and highlight main topics and insights
|
||||||
|
- **Chapter Generation**: Automatically create timestamped chapters
|
||||||
|
- **Export Options**: Save summaries as Markdown, PDF, or plain text
|
||||||
|
- **Caching System**: Reduce API calls with intelligent caching
|
||||||
|
- **Rate Limiting**: Built-in protection against API overuse
|
||||||
|
|
||||||
|
## 🏗️ Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
[Web Interface] → [FastAPI Backend] → [YouTube API/Transcript API]
|
||||||
|
↓
|
||||||
|
[AI Service] ← [Summary Generation] ← [Transcript Processing]
|
||||||
|
↓
|
||||||
|
[Database Cache] → [Summary Storage] → [Export Service]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- Python 3.11+
|
||||||
|
- YouTube API Key (optional but recommended)
|
||||||
|
- At least one AI service API key (OpenAI, Anthropic, or DeepSeek)
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
1. **Clone the repository**
|
||||||
|
```bash
|
||||||
|
git clone https://eniasgit.zeabur.app/demo/youtube-summarizer.git
|
||||||
|
cd youtube-summarizer
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Set up virtual environment**
|
||||||
|
```bash
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Install dependencies**
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Configure environment**
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# Edit .env with your API keys and configuration
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Initialize database**
|
||||||
|
```bash
|
||||||
|
alembic init alembic
|
||||||
|
alembic revision --autogenerate -m "Initial migration"
|
||||||
|
alembic upgrade head
|
||||||
|
```
|
||||||
|
|
||||||
|
6. **Run the application**
|
||||||
|
```bash
|
||||||
|
python src/main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
The application will be available at `http://localhost:8082`
|
||||||
|
|
||||||
|
## 📁 Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
youtube-summarizer/
|
||||||
|
├── src/
|
||||||
|
│ ├── api/ # API endpoints
|
||||||
|
│ │ ├── routes.py # Main API routes
|
||||||
|
│ │ └── websocket.py # Real-time updates
|
||||||
|
│ ├── services/ # Business logic
|
||||||
|
│ │ ├── youtube.py # YouTube integration
|
||||||
|
│ │ ├── summarizer.py # AI summarization
|
||||||
|
│ │ └── cache.py # Caching service
|
||||||
|
│ ├── utils/ # Utility functions
|
||||||
|
│ │ ├── validators.py # Input validation
|
||||||
|
│ │ └── formatters.py # Output formatting
|
||||||
|
│ └── main.py # Application entry point
|
||||||
|
├── tests/ # Test suite
|
||||||
|
├── docs/ # Documentation
|
||||||
|
├── alembic/ # Database migrations
|
||||||
|
├── static/ # Frontend assets
|
||||||
|
├── templates/ # HTML templates
|
||||||
|
├── requirements.txt # Python dependencies
|
||||||
|
├── .env.example # Environment template
|
||||||
|
└── README.md # This file
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔧 Configuration
|
||||||
|
|
||||||
|
### Essential Environment Variables
|
||||||
|
|
||||||
|
| Variable | Description | Required |
|
||||||
|
|----------|-------------|----------|
|
||||||
|
| `YOUTUBE_API_KEY` | YouTube Data API v3 key | Optional* |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI API key | One of these |
|
||||||
|
| `ANTHROPIC_API_KEY` | Anthropic Claude API key | is required |
|
||||||
|
| `DEEPSEEK_API_KEY` | DeepSeek API key | for AI |
|
||||||
|
| `DATABASE_URL` | Database connection string | Yes |
|
||||||
|
| `SECRET_KEY` | Session secret key | Yes |
|
||||||
|
|
||||||
|
*YouTube API key improves metadata fetching but transcript extraction works without it.
|
||||||
|
|
||||||
|
## 🧪 Testing
|
||||||
|
|
||||||
|
Run the test suite:
|
||||||
|
```bash
|
||||||
|
pytest tests/ -v
|
||||||
|
pytest tests/ --cov=src --cov-report=html # With coverage
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📝 API Documentation
|
||||||
|
|
||||||
|
Once running, visit:
|
||||||
|
- Interactive API docs: `http://localhost:8082/docs`
|
||||||
|
- Alternative docs: `http://localhost:8082/redoc`
|
||||||
|
|
||||||
|
### Key Endpoints
|
||||||
|
|
||||||
|
- `POST /api/summarize` - Submit a YouTube URL for summarization
|
||||||
|
- `GET /api/summary/{id}` - Retrieve a summary
|
||||||
|
- `GET /api/summaries` - List all summaries
|
||||||
|
- `POST /api/export/{id}` - Export summary in different formats
|
||||||
|
|
||||||
|
## 🚢 Deployment
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t youtube-summarizer .
|
||||||
|
docker run -p 8082:8082 --env-file .env youtube-summarizer
|
||||||
|
```
|
||||||
|
|
||||||
|
### Production Considerations
|
||||||
|
|
||||||
|
1. Use PostgreSQL instead of SQLite for production
|
||||||
|
2. Configure proper CORS settings
|
||||||
|
3. Set up SSL/TLS certificates
|
||||||
|
4. Implement user authentication
|
||||||
|
5. Configure rate limiting per user
|
||||||
|
6. Set up monitoring and logging
|
||||||
|
|
||||||
|
## 🤝 Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch
|
||||||
|
3. Commit your changes
|
||||||
|
4. Push to the branch
|
||||||
|
5. Create a Pull Request
|
||||||
|
|
||||||
|
## 📄 License
|
||||||
|
|
||||||
|
This project is part of the Personal AI Assistant ecosystem.
|
||||||
|
|
||||||
|
## 🔗 Related Projects
|
||||||
|
|
||||||
|
- [Personal AI Assistant](https://eniasgit.zeabur.app/demo/my-ai-projects)
|
||||||
|
- [YouTube Automation Service](https://eniasgit.zeabur.app/demo/youtube-automation)
|
||||||
|
- [PDF Translator](https://eniasgit.zeabur.app/demo/pdf-translator)
|
||||||
|
|
||||||
|
## 📞 Support
|
||||||
|
|
||||||
|
For issues and questions, please create an issue in the repository.
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
# YouTube Summarizer Web Application Requirements
|
||||||
|
|
||||||
|
# Web framework
|
||||||
|
fastapi==0.104.1
|
||||||
|
uvicorn[standard]==0.24.0
|
||||||
|
python-multipart==0.0.6
|
||||||
|
|
||||||
|
# YouTube integration
|
||||||
|
youtube-transcript-api==0.6.1
|
||||||
|
pytube==15.0.0
|
||||||
|
yt-dlp==2024.8.6
|
||||||
|
|
||||||
|
# AI and NLP
|
||||||
|
openai==1.35.0
|
||||||
|
anthropic==0.28.0
|
||||||
|
langchain==0.2.5
|
||||||
|
tiktoken==0.7.0
|
||||||
|
|
||||||
|
# Database
|
||||||
|
sqlalchemy==2.0.23
|
||||||
|
alembic==1.13.0
|
||||||
|
|
||||||
|
# HTTP client
|
||||||
|
httpx==0.25.2
|
||||||
|
requests==2.31.0
|
||||||
|
|
||||||
|
# Environment and configuration
|
||||||
|
python-dotenv==1.0.0
|
||||||
|
pydantic==2.5.2
|
||||||
|
pydantic-settings==2.1.0
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
python-dateutil==2.8.2
|
||||||
|
pytz==2024.1
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
pytest==7.4.3
|
||||||
|
pytest-asyncio==0.21.1
|
||||||
|
pytest-cov==4.1.0
|
||||||
|
|
||||||
|
# Development
|
||||||
|
black==23.12.0
|
||||||
|
ruff==0.1.8
|
||||||
|
mypy==1.7.1
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
YouTube Summarizer Web Application
|
||||||
|
Main entry point for the FastAPI application
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Add project root to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Import routers (to be created)
|
||||||
|
# from api.routes import api_router
|
||||||
|
# from api.websocket import websocket_router
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""
|
||||||
|
Manage application lifecycle
|
||||||
|
"""
|
||||||
|
# Startup
|
||||||
|
print("🚀 Starting YouTube Summarizer Application...")
|
||||||
|
|
||||||
|
# Initialize services here
|
||||||
|
# await initialize_database()
|
||||||
|
# await initialize_cache()
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
print("🛑 Shutting down YouTube Summarizer Application...")
|
||||||
|
# await cleanup_resources()
|
||||||
|
|
||||||
|
# Create FastAPI application
|
||||||
|
app = FastAPI(
|
||||||
|
title="YouTube Summarizer",
|
||||||
|
description="AI-powered YouTube video summarization service",
|
||||||
|
version="1.0.0",
|
||||||
|
lifespan=lifespan
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure CORS
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"], # Configure appropriately for production
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mount static files (if needed)
|
||||||
|
# app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
# app.include_router(api_router, prefix="/api")
|
||||||
|
# app.include_router(websocket_router)
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""
|
||||||
|
Root endpoint
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"message": "YouTube Summarizer API",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"docs": "/docs",
|
||||||
|
"health": "/health"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "youtube-summarizer"
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
host = os.getenv("APP_HOST", "0.0.0.0")
|
||||||
|
port = int(os.getenv("APP_PORT", 8082))
|
||||||
|
debug = os.getenv("DEBUG", "False").lower() == "true"
|
||||||
|
|
||||||
|
print(f"🎬 YouTube Summarizer starting on http://{host}:{port}")
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
"main:app",
|
||||||
|
host=host,
|
||||||
|
port=port,
|
||||||
|
reload=debug,
|
||||||
|
log_level="debug" if debug else "info"
|
||||||
|
)
|
||||||
Loading…
Reference in New Issue