118 lines
3.7 KiB
Plaintext
118 lines
3.7 KiB
Plaintext
---
|
|
description: Multi-layer caching strategy for optimized performance
|
|
globs: **/cache*.py, **/services/*.py, **/repositories/*.py, **/models/*.py
|
|
alwaysApply: false
|
|
---
|
|
|
|
# Caching Strategy Rule
|
|
|
|
## Core Principles
|
|
- **Multi-Layer Approach**: Different TTLs for different data types
|
|
- **Prioritize Expensive Operations**: Cache the most resource-intensive operations
|
|
- **Storage Hierarchy**: Use appropriate storage for different cache types
|
|
- **Performance Monitoring**: Track cache hit rates and effectiveness
|
|
- **Cost-Driven Decisions**: Cache expensive operations first
|
|
- **Invalidation Over Staleness**: Better to miss than serve stale data
|
|
|
|
## Implementation Patterns
|
|
|
|
### TTL Configuration```python
|
|
# ✅ DO: Use appropriate TTLs for different data types
|
|
class CacheTTL:
|
|
"""Time-to-live constants for different cache types."""
|
|
# Long-lived data
|
|
TRANSCRIPTS = 30 * 24 * 60 * 60 # 30 days in seconds
|
|
|
|
# Medium-lived data
|
|
AI_ENHANCEMENTS = 7 * 24 * 60 * 60 # 7 days in seconds
|
|
AUDIO_PREPROCESSING = 7 * 24 * 60 * 60 # 7 days in seconds
|
|
|
|
# Short-lived data
|
|
SEARCH_RESULTS = 24 * 60 * 60 # 1 day in seconds
|
|
USER_PREFERENCES = 12 * 60 * 60 # 12 hours in seconds
|
|
```
|
|
|
|
### Storage Selection
|
|
```python
|
|
# ✅ DO: Use appropriate storage for different cache types
|
|
class CacheStorage:
|
|
def __init__(self):
|
|
self.redis = RedisClient() # Fast, in-memory cache
|
|
self.db = DatabaseClient() # Persistent storage
|
|
self.fs = FileSystemCache() # Large file storage
|
|
|
|
async def get_transcript(self, key: str) -> Optional[Dict[str, Any]]:
|
|
"""Get transcript from cache, trying Redis first, then DB."""
|
|
# Try Redis first (fast)
|
|
result = await self.redis.get(f"transcript:{key}")
|
|
if result:
|
|
return result
|
|
|
|
# Fall back to database (persistent)
|
|
result = await self.db.get_transcript(key)
|
|
if result:
|
|
# Backfill Redis cache for next time
|
|
await self.redis.set(f"transcript:{key}", result, ex=CacheTTL.TRANSCRIPTS)
|
|
return result
|
|
|
|
return None
|
|
|
|
async def get_audio_preprocessing(self, key: str) -> Optional[Path]:
|
|
"""Get preprocessed audio from filesystem cache."""
|
|
# Large files stored on filesystem
|
|
return await self.fs.get(f"audio:{key}")
|
|
```
|
|
|
|
### Cache Monitoring
|
|
```python
|
|
# ✅ DO: Track cache performance metrics
|
|
class CacheMetrics:
|
|
def __init__(self):
|
|
self.hits = 0
|
|
self.misses = 0
|
|
self.total = 0
|
|
|
|
def record_hit(self):
|
|
self.hits += 1
|
|
self.total += 1
|
|
|
|
def record_miss(self):
|
|
self.misses += 1
|
|
self.total += 1
|
|
|
|
@property
|
|
def hit_rate(self) -> float:
|
|
"""Calculate cache hit rate."""
|
|
if self.total == 0:
|
|
return 0.0
|
|
return self.hits / self.total
|
|
```
|
|
|
|
### Compression
|
|
```python
|
|
# ✅ DO: Compress cached data
|
|
def cache_with_compression(data: Dict[str, Any]) -> bytes:
|
|
"""Compress data before caching."""
|
|
json_data = json.dumps(data).encode('utf-8')
|
|
return lz4.frame.compress(json_data)
|
|
|
|
def decompress_cached_data(compressed_data: bytes) -> Dict[str, Any]:
|
|
"""Decompress data from cache."""
|
|
json_data = lz4.frame.decompress(compressed_data)
|
|
return json.loads(json_data.decode('utf-8'))
|
|
```
|
|
|
|
### Anti-Patterns
|
|
```python
|
|
# ❌ DON'T: Use same TTL for all data types
|
|
# Setting everything to the same TTL is inefficient
|
|
cache.set("transcript", data, ex=86400) # Wrong! Should be 30 days
|
|
cache.set("ai_enhancement", data, ex=86400) # Wrong! Should be 7 days
|
|
|
|
# ❌ DON'T: Store large files in Redis
|
|
# This will consume too much memory
|
|
redis.set(f"audio:{key}", large_binary_data) # Wrong! Use filesystem
|
|
```
|
|
|
|
|