2025-11-13 03:25:21 +03:00
|
|
|
|
"""
|
|
|
|
|
|
Flask web server - RSS-Bridge benzeri URL template sistemi
|
|
|
|
|
|
"""
|
2025-11-13 03:52:26 +03:00
|
|
|
|
from flask import Flask, request, Response, jsonify, g
|
2025-11-13 03:25:21 +03:00
|
|
|
|
from typing import Optional
|
|
|
|
|
|
import sys
|
2025-11-13 03:40:05 +03:00
|
|
|
|
import os
|
|
|
|
|
|
import yaml
|
2025-11-13 04:12:05 +03:00
|
|
|
|
import time
|
2025-11-13 05:16:12 +03:00
|
|
|
|
import logging
|
2025-11-13 05:31:43 +03:00
|
|
|
|
import random
|
2025-11-13 03:25:21 +03:00
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
2025-11-13 05:16:12 +03:00
|
|
|
|
# Logger oluştur
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
2025-11-13 03:25:21 +03:00
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
|
|
|
|
|
|
|
|
from src.database import Database
|
|
|
|
|
|
from src.video_fetcher import fetch_videos_from_rss_bridge, get_channel_id_from_handle, extract_video_id
|
|
|
|
|
|
from src.transcript_extractor import TranscriptExtractor
|
|
|
|
|
|
from src.transcript_cleaner import TranscriptCleaner
|
|
|
|
|
|
from src.rss_generator import RSSGenerator
|
2025-11-13 03:40:05 +03:00
|
|
|
|
from src.security import (
|
|
|
|
|
|
init_security, get_security_manager,
|
|
|
|
|
|
require_api_key, rate_limit, validate_input
|
|
|
|
|
|
)
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
2025-11-13 03:40:05 +03:00
|
|
|
|
# Security config yükle
|
|
|
|
|
|
_security_config = None
|
|
|
|
|
|
def load_security_config():
|
|
|
|
|
|
"""Security config'i yükle"""
|
|
|
|
|
|
global _security_config
|
|
|
|
|
|
if _security_config is None:
|
|
|
|
|
|
config_path = Path(__file__).parent.parent / 'config' / 'security.yaml'
|
|
|
|
|
|
if config_path.exists():
|
|
|
|
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
|
_security_config = yaml.safe_load(f).get('security', {})
|
|
|
|
|
|
else:
|
|
|
|
|
|
_security_config = {}
|
|
|
|
|
|
return _security_config
|
|
|
|
|
|
|
|
|
|
|
|
# Security manager'ı initialize et
|
|
|
|
|
|
def init_app_security():
|
|
|
|
|
|
"""Security manager'ı uygulama başlangıcında initialize et"""
|
|
|
|
|
|
config = load_security_config()
|
|
|
|
|
|
api_keys = config.get('api_keys', {})
|
|
|
|
|
|
default_rate_limit = config.get('default_rate_limit', 60)
|
|
|
|
|
|
init_security(api_keys, default_rate_limit)
|
|
|
|
|
|
|
|
|
|
|
|
# Security headers ve CORS middleware
|
|
|
|
|
|
@app.after_request
|
|
|
|
|
|
def add_security_headers(response):
|
|
|
|
|
|
"""Security header'ları ekle"""
|
|
|
|
|
|
config = load_security_config()
|
|
|
|
|
|
headers = config.get('security_headers', {})
|
|
|
|
|
|
|
|
|
|
|
|
for header, value in headers.items():
|
|
|
|
|
|
response.headers[header] = value
|
|
|
|
|
|
|
|
|
|
|
|
# CORS headers
|
|
|
|
|
|
cors_config = config.get('cors', {})
|
|
|
|
|
|
if cors_config.get('enabled', True):
|
|
|
|
|
|
origins = cors_config.get('allowed_origins', ['*'])
|
|
|
|
|
|
if '*' in origins:
|
|
|
|
|
|
response.headers['Access-Control-Allow-Origin'] = '*'
|
|
|
|
|
|
else:
|
|
|
|
|
|
origin = request.headers.get('Origin')
|
|
|
|
|
|
if origin in origins:
|
|
|
|
|
|
response.headers['Access-Control-Allow-Origin'] = origin
|
|
|
|
|
|
|
|
|
|
|
|
response.headers['Access-Control-Allow-Methods'] = ', '.join(
|
|
|
|
|
|
cors_config.get('allowed_methods', ['GET', 'OPTIONS'])
|
|
|
|
|
|
)
|
|
|
|
|
|
response.headers['Access-Control-Allow-Headers'] = ', '.join(
|
|
|
|
|
|
cors_config.get('allowed_headers', ['Content-Type', 'X-API-Key'])
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Rate limit bilgisini header'a ekle
|
|
|
|
|
|
if hasattr(g, 'rate_limit_remaining'):
|
|
|
|
|
|
response.headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
|
|
|
|
|
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
|
|
# OPTIONS handler for CORS
|
|
|
|
|
|
@app.route('/', methods=['OPTIONS'])
|
|
|
|
|
|
@app.route('/<path:path>', methods=['OPTIONS'])
|
|
|
|
|
|
def handle_options(path=None):
|
|
|
|
|
|
"""CORS preflight request handler"""
|
|
|
|
|
|
return Response(status=200)
|
|
|
|
|
|
|
|
|
|
|
|
# Uygulama başlangıcında security'yi initialize et
|
|
|
|
|
|
init_app_security()
|
|
|
|
|
|
|
2025-11-13 03:25:21 +03:00
|
|
|
|
# Global instances (lazy loading)
|
|
|
|
|
|
db = None
|
|
|
|
|
|
extractor = None
|
|
|
|
|
|
cleaner = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_db():
|
|
|
|
|
|
"""Database instance'ı al (singleton)"""
|
|
|
|
|
|
global db
|
|
|
|
|
|
if db is None:
|
|
|
|
|
|
db = Database()
|
|
|
|
|
|
db.init_database()
|
|
|
|
|
|
return db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_extractor():
|
|
|
|
|
|
"""Transcript extractor instance'ı al"""
|
|
|
|
|
|
global extractor
|
|
|
|
|
|
if extractor is None:
|
|
|
|
|
|
extractor = TranscriptExtractor()
|
|
|
|
|
|
return extractor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_cleaner():
|
|
|
|
|
|
"""Transcript cleaner instance'ı al"""
|
|
|
|
|
|
global cleaner
|
|
|
|
|
|
if cleaner is None:
|
|
|
|
|
|
cleaner = TranscriptCleaner()
|
|
|
|
|
|
return cleaner
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_channel_id(channel_id: Optional[str] = None,
|
|
|
|
|
|
channel: Optional[str] = None,
|
|
|
|
|
|
channel_url: Optional[str] = None) -> Optional[str]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Farklı formatlardan channel ID'yi normalize et
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
channel_id: Direkt Channel ID (UC...)
|
|
|
|
|
|
channel: Channel handle (@username) veya username
|
|
|
|
|
|
channel_url: Full YouTube channel URL
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Normalize edilmiş Channel ID veya None
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Direkt Channel ID varsa
|
|
|
|
|
|
if channel_id:
|
|
|
|
|
|
if channel_id.startswith('UC') and len(channel_id) == 24:
|
|
|
|
|
|
return channel_id
|
|
|
|
|
|
# Eğer URL formatında ise parse et
|
|
|
|
|
|
if 'youtube.com/channel/' in channel_id:
|
|
|
|
|
|
parts = channel_id.split('/channel/')
|
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
|
|
return parts[-1].split('?')[0].split('/')[0]
|
|
|
|
|
|
|
|
|
|
|
|
# Channel handle (@username)
|
|
|
|
|
|
if channel:
|
|
|
|
|
|
if not channel.startswith('@'):
|
|
|
|
|
|
channel = f"@{channel}"
|
|
|
|
|
|
handle_url = f"https://www.youtube.com/{channel}"
|
|
|
|
|
|
return get_channel_id_from_handle(handle_url)
|
|
|
|
|
|
|
|
|
|
|
|
# Channel URL
|
|
|
|
|
|
if channel_url:
|
|
|
|
|
|
# Handle URL
|
|
|
|
|
|
if '/@' in channel_url:
|
|
|
|
|
|
return get_channel_id_from_handle(channel_url)
|
|
|
|
|
|
# Channel ID URL
|
|
|
|
|
|
elif '/channel/' in channel_url:
|
|
|
|
|
|
parts = channel_url.split('/channel/')
|
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
|
|
return parts[-1].split('?')[0].split('/')[0]
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_channel(channel_id: str, max_items: int = 50) -> dict:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Kanal için transcript feed'i oluştur
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
RSS feed string ve metadata
|
|
|
|
|
|
"""
|
|
|
|
|
|
db = get_db()
|
|
|
|
|
|
extractor = get_extractor()
|
|
|
|
|
|
cleaner = get_cleaner()
|
|
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
# RSS-Bridge'den videoları çek (max_items'ın 2 katı kadar çek, böylece yeterli video olur)
|
|
|
|
|
|
# RSS-Bridge'den daha fazla video çekiyoruz çünkü bazıları transcript'siz olabilir
|
|
|
|
|
|
rss_bridge_limit = max(max_items * 2, 50) # En az 50 video çek
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[PROCESS] Channel {channel_id} için RSS-Bridge'den video listesi çekiliyor (limit: {rss_bridge_limit})")
|
|
|
|
|
|
|
2025-11-13 03:25:21 +03:00
|
|
|
|
try:
|
|
|
|
|
|
videos = fetch_videos_from_rss_bridge(
|
|
|
|
|
|
base_url="https://rss-bridge.org/bridge01",
|
|
|
|
|
|
channel_id=channel_id,
|
|
|
|
|
|
format="Atom",
|
2025-11-13 04:12:05 +03:00
|
|
|
|
max_items=rss_bridge_limit
|
2025-11-13 03:25:21 +03:00
|
|
|
|
)
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[PROCESS] RSS-Bridge'den {len(videos)} video alındı")
|
2025-11-13 03:25:21 +03:00
|
|
|
|
except Exception as e:
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.error(f"[PROCESS] ❌ RSS-Bridge hatası: {type(e).__name__} - {str(e)}")
|
2025-11-13 03:25:21 +03:00
|
|
|
|
raise Exception(f"RSS-Bridge hatası: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
# Yeni videoları veritabanına ekle
|
2025-11-13 05:16:12 +03:00
|
|
|
|
new_videos_count = 0
|
2025-11-13 03:25:21 +03:00
|
|
|
|
for video in videos:
|
|
|
|
|
|
video['channel_id'] = channel_id
|
|
|
|
|
|
if not db.is_video_processed(video['video_id']):
|
|
|
|
|
|
db.add_video(video)
|
2025-11-13 05:16:12 +03:00
|
|
|
|
new_videos_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
if new_videos_count > 0:
|
|
|
|
|
|
logger.info(f"[PROCESS] {new_videos_count} yeni video veritabanına eklendi")
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.debug(f"[PROCESS] Tüm videolar zaten veritabanında")
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
2025-11-13 05:31:43 +03:00
|
|
|
|
# Bekleyen videoları işle (max_items kadar, küçük batch'ler halinde)
|
|
|
|
|
|
# YouTube IP blocking'i önlemek için her batch'te sadece 5 video işlenir
|
2025-11-13 04:12:05 +03:00
|
|
|
|
# max_items: Her istekte kaç video transcript işleneceği (maksimum 100)
|
2025-11-13 05:31:43 +03:00
|
|
|
|
batch_size = 5 # Her batch'te işlenecek video sayısı (küçük batch = daha az blocking riski)
|
2025-11-13 04:12:05 +03:00
|
|
|
|
processed_count = 0 # İşlenen transcript sayısı
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
# Tüm bekleyen videoları al (channel_id'ye göre filtrele)
|
|
|
|
|
|
all_pending_videos = [v for v in db.get_pending_videos() if v['channel_id'] == channel_id]
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[PROCESS] Channel {channel_id} için {len(all_pending_videos)} bekleyen video bulundu (max_items: {max_items})")
|
2025-11-13 04:12:05 +03:00
|
|
|
|
|
|
|
|
|
|
# max_items kadar transcript işlenene kadar batch'ler halinde işle
|
2025-11-13 05:16:12 +03:00
|
|
|
|
total_batches = (len(all_pending_videos) + batch_size - 1) // batch_size
|
|
|
|
|
|
current_batch = 0
|
|
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
for batch_start in range(0, len(all_pending_videos), batch_size):
|
|
|
|
|
|
if processed_count >= max_items:
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[PROCESS] Maksimum transcript sayısına ulaşıldı ({processed_count}/{max_items})")
|
2025-11-13 04:12:05 +03:00
|
|
|
|
break
|
2025-11-13 05:16:12 +03:00
|
|
|
|
|
|
|
|
|
|
current_batch += 1
|
2025-11-13 04:12:05 +03:00
|
|
|
|
batch_videos = all_pending_videos[batch_start:batch_start + batch_size]
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[BATCH] Batch {current_batch}/{total_batches} başlatılıyor ({len(batch_videos)} video, Toplam işlenen: {processed_count}/{max_items})")
|
|
|
|
|
|
|
|
|
|
|
|
batch_processed = 0
|
|
|
|
|
|
batch_cached = 0
|
|
|
|
|
|
batch_failed = 0
|
2025-11-13 04:12:05 +03:00
|
|
|
|
|
|
|
|
|
|
for video in batch_videos:
|
|
|
|
|
|
if processed_count >= max_items:
|
|
|
|
|
|
break
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
2025-11-13 05:16:12 +03:00
|
|
|
|
video_id = video['video_id']
|
|
|
|
|
|
video_title = video.get('video_title', 'N/A')[:50]
|
|
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
# Cache kontrolü: 3 gün içinde işlenmiş transcript varsa atla
|
2025-11-13 05:16:12 +03:00
|
|
|
|
if db.is_transcript_cached(video_id, cache_days=3):
|
|
|
|
|
|
logger.debug(f"[CACHE] Video {video_id} ({video_title}) transcript'i cache'de, atlanıyor")
|
|
|
|
|
|
batch_cached += 1
|
2025-11-13 04:12:05 +03:00
|
|
|
|
continue
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
try:
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[VIDEO] Video işleniyor: {video_id} - {video_title}")
|
|
|
|
|
|
|
2025-11-13 04:12:05 +03:00
|
|
|
|
# Transcript çıkar
|
|
|
|
|
|
transcript = extractor.fetch_transcript(
|
2025-11-13 05:16:12 +03:00
|
|
|
|
video_id,
|
2025-11-13 04:12:05 +03:00
|
|
|
|
languages=['tr', 'en']
|
2025-11-13 03:25:21 +03:00
|
|
|
|
)
|
2025-11-13 04:12:05 +03:00
|
|
|
|
|
|
|
|
|
|
if transcript:
|
|
|
|
|
|
# Transcript temizle
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.debug(f"[VIDEO] Video {video_id} transcript'i temizleniyor...")
|
2025-11-13 04:12:05 +03:00
|
|
|
|
raw, clean = cleaner.clean_transcript(transcript, sentences_per_paragraph=3)
|
|
|
|
|
|
|
|
|
|
|
|
# Veritabanına kaydet (her batch hemen kaydedilir)
|
|
|
|
|
|
db.update_video_transcript(
|
2025-11-13 05:16:12 +03:00
|
|
|
|
video_id,
|
2025-11-13 04:12:05 +03:00
|
|
|
|
raw,
|
|
|
|
|
|
clean,
|
|
|
|
|
|
status=1,
|
|
|
|
|
|
language='tr'
|
|
|
|
|
|
)
|
|
|
|
|
|
processed_count += 1
|
2025-11-13 05:16:12 +03:00
|
|
|
|
batch_processed += 1
|
|
|
|
|
|
logger.info(f"[VIDEO] ✅ Video {video_id} başarıyla işlendi ve kaydedildi ({processed_count}/{max_items})")
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning(f"[VIDEO] ⚠️ Video {video_id} transcript'i alınamadı (None döndü)")
|
|
|
|
|
|
batch_failed += 1
|
|
|
|
|
|
db.mark_video_failed(video_id, "Transcript None döndü")
|
2025-11-13 04:12:05 +03:00
|
|
|
|
except Exception as e:
|
2025-11-13 05:16:12 +03:00
|
|
|
|
error_type = type(e).__name__
|
|
|
|
|
|
error_msg = str(e)[:200]
|
|
|
|
|
|
logger.error(f"[VIDEO] ❌ Video {video_id} işleme hatası: {error_type} - {error_msg}")
|
|
|
|
|
|
db.mark_video_failed(video_id, str(e))
|
|
|
|
|
|
batch_failed += 1
|
|
|
|
|
|
|
|
|
|
|
|
# Batch özeti
|
|
|
|
|
|
logger.info(f"[BATCH] Batch {current_batch}/{total_batches} tamamlandı - İşlenen: {batch_processed}, Cache: {batch_cached}, Başarısız: {batch_failed}")
|
2025-11-13 04:12:05 +03:00
|
|
|
|
|
2025-11-13 05:31:43 +03:00
|
|
|
|
# Batch tamamlandı, uzun bekleme (YouTube IP blocking önleme için)
|
2025-11-13 04:12:05 +03:00
|
|
|
|
if processed_count < max_items and batch_start + batch_size < len(all_pending_videos):
|
2025-11-13 05:31:43 +03:00
|
|
|
|
# Blocking varsa daha uzun bekle
|
|
|
|
|
|
wait_time = 60 + random.uniform(0, 30) # 60-90 saniye random (human-like)
|
|
|
|
|
|
logger.info(f"[BATCH] Batch'ler arası bekleme: {wait_time:.1f} saniye ({wait_time/60:.1f} dakika) - YouTube IP blocking önleme")
|
2025-11-13 05:16:12 +03:00
|
|
|
|
time.sleep(wait_time)
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
|
|
|
|
|
# İşlenmiş videoları getir
|
|
|
|
|
|
processed_videos = db.get_processed_videos(
|
|
|
|
|
|
limit=max_items,
|
|
|
|
|
|
channel_id=channel_id
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2025-11-13 05:16:12 +03:00
|
|
|
|
logger.info(f"[PROCESS] ✅ Channel {channel_id} işleme tamamlandı - {len(processed_videos)} işlenmiş video döndürülüyor")
|
|
|
|
|
|
|
2025-11-13 03:25:21 +03:00
|
|
|
|
return {
|
|
|
|
|
|
'videos': processed_videos,
|
|
|
|
|
|
'channel_id': channel_id,
|
|
|
|
|
|
'count': len(processed_videos)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/', methods=['GET'])
|
2025-11-13 03:40:05 +03:00
|
|
|
|
@require_api_key # API key zorunlu
|
|
|
|
|
|
@validate_input # Input validation
|
2025-11-13 03:25:21 +03:00
|
|
|
|
def generate_feed():
|
|
|
|
|
|
"""
|
|
|
|
|
|
RSS-Bridge benzeri URL template:
|
|
|
|
|
|
|
|
|
|
|
|
Örnekler:
|
|
|
|
|
|
- /?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom
|
|
|
|
|
|
- /?channel=@tavakfi&format=Atom
|
|
|
|
|
|
- /?channel_url=https://www.youtube.com/@tavakfi&format=Atom
|
|
|
|
|
|
"""
|
2025-11-13 03:40:05 +03:00
|
|
|
|
# Query parametrelerini al (validate_input decorator zaten sanitize etti)
|
2025-11-13 03:25:21 +03:00
|
|
|
|
channel_id = request.args.get('channel_id')
|
|
|
|
|
|
channel = request.args.get('channel') # @username veya username
|
|
|
|
|
|
channel_url = request.args.get('channel_url')
|
|
|
|
|
|
format_type = request.args.get('format', 'Atom').lower() # Atom veya Rss
|
2025-11-13 03:40:05 +03:00
|
|
|
|
try:
|
2025-11-13 04:12:05 +03:00
|
|
|
|
max_items = int(request.args.get('max_items', 10)) # Default: 10 transcript
|
|
|
|
|
|
# Maksimum 100 transcript (20'şer batch'ler halinde işlenir)
|
|
|
|
|
|
max_items = min(max_items, 100)
|
2025-11-13 03:40:05 +03:00
|
|
|
|
except (ValueError, TypeError):
|
2025-11-13 04:12:05 +03:00
|
|
|
|
max_items = 10
|
2025-11-13 03:25:21 +03:00
|
|
|
|
|
|
|
|
|
|
# Channel ID'yi normalize et
|
|
|
|
|
|
normalized_channel_id = normalize_channel_id(
|
|
|
|
|
|
channel_id=channel_id,
|
|
|
|
|
|
channel=channel,
|
|
|
|
|
|
channel_url=channel_url
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not normalized_channel_id:
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
|
'error': 'Channel ID bulunamadı',
|
2025-11-13 04:12:05 +03:00
|
|
|
|
'usage': {
|
2025-11-13 03:25:21 +03:00
|
|
|
|
'channel_id': 'UC... (YouTube Channel ID)',
|
|
|
|
|
|
'channel': '@username veya username',
|
|
|
|
|
|
'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...',
|
|
|
|
|
|
'format': 'Atom veya Rss (varsayılan: Atom)',
|
2025-11-13 04:12:05 +03:00
|
|
|
|
'max_items': 'Maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
}
|
|
|
|
|
|
}), 400
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Kanalı işle
|
|
|
|
|
|
result = process_channel(normalized_channel_id, max_items=max_items)
|
|
|
|
|
|
|
|
|
|
|
|
if not result['videos']:
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
|
'error': 'Henüz işlenmiş video yok',
|
|
|
|
|
|
'channel_id': normalized_channel_id,
|
2025-11-13 03:52:26 +03:00
|
|
|
|
'message': 'Transcript\'ler arka planda işleniyor. Lütfen birkaç dakika sonra tekrar deneyin.',
|
|
|
|
|
|
'note': 'YouTube IP blocking nedeniyle transcript çıkarımı yavaş olabilir. İlk istekte birkaç dakika bekleyin.'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
}), 404
|
|
|
|
|
|
|
|
|
|
|
|
# RSS feed oluştur
|
|
|
|
|
|
channel_info = {
|
|
|
|
|
|
'id': normalized_channel_id,
|
|
|
|
|
|
'title': f"YouTube Transcript Feed - {normalized_channel_id}",
|
|
|
|
|
|
'link': f"https://www.youtube.com/channel/{normalized_channel_id}",
|
|
|
|
|
|
'description': f'Full-text transcript RSS feed for channel {normalized_channel_id}',
|
|
|
|
|
|
'language': 'en'
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
generator = RSSGenerator(channel_info)
|
|
|
|
|
|
|
|
|
|
|
|
for video in result['videos']:
|
|
|
|
|
|
generator.add_video_entry(video)
|
|
|
|
|
|
|
|
|
|
|
|
# Format'a göre döndür
|
2025-11-13 03:40:05 +03:00
|
|
|
|
response_headers = {}
|
|
|
|
|
|
if hasattr(g, 'rate_limit_remaining'):
|
|
|
|
|
|
response_headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
|
|
|
|
|
|
|
2025-11-13 03:25:21 +03:00
|
|
|
|
if format_type == 'rss':
|
|
|
|
|
|
rss_content = generator.generate_rss_string()
|
2025-11-13 03:40:05 +03:00
|
|
|
|
response_headers['Content-Type'] = 'application/rss+xml; charset=utf-8'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
return Response(
|
|
|
|
|
|
rss_content,
|
|
|
|
|
|
mimetype='application/rss+xml',
|
2025-11-13 03:40:05 +03:00
|
|
|
|
headers=response_headers
|
2025-11-13 03:25:21 +03:00
|
|
|
|
)
|
|
|
|
|
|
else: # Atom
|
|
|
|
|
|
# Feedgen Atom desteği
|
|
|
|
|
|
atom_content = generator.generate_atom_string()
|
2025-11-13 03:40:05 +03:00
|
|
|
|
response_headers['Content-Type'] = 'application/atom+xml; charset=utf-8'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
return Response(
|
|
|
|
|
|
atom_content,
|
|
|
|
|
|
mimetype='application/atom+xml',
|
2025-11-13 03:40:05 +03:00
|
|
|
|
headers=response_headers
|
2025-11-13 03:25:21 +03:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
|
'error': str(e),
|
|
|
|
|
|
'channel_id': normalized_channel_id
|
|
|
|
|
|
}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/health', methods=['GET'])
|
2025-11-13 03:40:05 +03:00
|
|
|
|
@rate_limit(limit_per_minute=120) # Health check için daha yüksek limit
|
2025-11-13 03:25:21 +03:00
|
|
|
|
def health():
|
|
|
|
|
|
"""Health check endpoint"""
|
|
|
|
|
|
return jsonify({'status': 'ok', 'service': 'YouTube Transcript RSS Feed'})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/info', methods=['GET'])
|
2025-11-13 03:40:05 +03:00
|
|
|
|
@require_api_key # API key zorunlu
|
2025-11-13 03:25:21 +03:00
|
|
|
|
def info():
|
|
|
|
|
|
"""API bilgileri"""
|
|
|
|
|
|
return jsonify({
|
|
|
|
|
|
'service': 'YouTube Transcript RSS Feed Generator',
|
|
|
|
|
|
'version': '1.0.0',
|
|
|
|
|
|
'endpoints': {
|
|
|
|
|
|
'/': 'RSS Feed Generator',
|
|
|
|
|
|
'/health': 'Health Check',
|
|
|
|
|
|
'/info': 'API Info'
|
|
|
|
|
|
},
|
|
|
|
|
|
'usage': {
|
|
|
|
|
|
'channel_id': 'UC... (YouTube Channel ID)',
|
|
|
|
|
|
'channel': '@username veya username',
|
|
|
|
|
|
'channel_url': 'Full YouTube channel URL',
|
|
|
|
|
|
'format': 'Atom veya Rss (varsayılan: Atom)',
|
2025-11-13 04:12:05 +03:00
|
|
|
|
'max_items': 'Her istekte işlenecek maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
},
|
|
|
|
|
|
'examples': [
|
|
|
|
|
|
'/?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom',
|
|
|
|
|
|
'/?channel=@tavakfi&format=Rss',
|
2025-11-13 04:12:05 +03:00
|
|
|
|
'/?channel_url=https://www.youtube.com/@tavakfi&format=Atom&max_items=50'
|
2025-11-13 03:25:21 +03:00
|
|
|
|
]
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
app.run(host='0.0.0.0', port=5000, debug=True)
|
|
|
|
|
|
|