Youtube2Feed/src/web_server.py

"""
Flask web server - RSS-Bridge benzeri URL template sistemi
"""
from flask import Flask, request, Response, jsonify, g
from typing import Optional
from urllib.parse import unquote, urlparse
import sys
import os
import yaml
import time
import logging
import random
from pathlib import Path

# Logger oluştur
logger = logging.getLogger(__name__)

sys.path.insert(0, str(Path(__file__).parent.parent))

from src.database import Database
from src.video_fetcher import fetch_videos_from_rss_bridge, get_channel_id_from_handle, extract_video_id
from src.transcript_extractor import TranscriptExtractor
from src.transcript_cleaner import TranscriptCleaner
from src.rss_generator import RSSGenerator
from src.security import (
    init_security, get_security_manager,
    require_api_key, rate_limit, validate_input
)


app = Flask(__name__)

# Security config yükle
_security_config = None
def load_security_config():
    """Security config'i yükle"""
    global _security_config
    if _security_config is None:
        config_path = Path(__file__).parent.parent / 'config' / 'security.yaml'
        if config_path.exists():
            with open(config_path, 'r', encoding='utf-8') as f:
                _security_config = yaml.safe_load(f).get('security', {})
        else:
            _security_config = {}
    return _security_config

# Security manager'ı initialize et
def init_app_security():
    """Security manager'ı uygulama başlangıcında initialize et"""
    config = load_security_config()
    api_keys = config.get('api_keys', {})
    default_rate_limit = config.get('default_rate_limit', 60)
    init_security(api_keys, default_rate_limit)

# Security headers ve CORS middleware
@app.after_request
def add_security_headers(response):
    """Security header'ları ekle"""
    config = load_security_config()
    headers = config.get('security_headers', {})
    
    # RSS feed'ler için Content-Security-Policy'yi daha esnek yap
    # RSS okuyucular ve tarayıcılar için sorun çıkarmasın
    is_feed_response = (
        'application/atom+xml' in response.content_type or
        'application/rss+xml' in response.content_type or
        'application/xml' in response.content_type or
        'text/xml' in response.content_type
    )
    
    for header, value in headers.items():
        # RSS feed'ler için CSP'yi atla veya daha esnek yap
        if header == 'Content-Security-Policy' and is_feed_response:
            # RSS feed'ler için CSP'yi daha esnek yap
            response.headers[header] = "default-src 'self' 'unsafe-inline' data: blob: *"
        else:
            response.headers[header] = value
    
    # CORS headers
    cors_config = config.get('cors', {})
    if cors_config.get('enabled', True):
        origins = cors_config.get('allowed_origins', ['*'])
        if '*' in origins:
            response.headers['Access-Control-Allow-Origin'] = '*'
        else:
            origin = request.headers.get('Origin')
            if origin in origins:
                response.headers['Access-Control-Allow-Origin'] = origin
        
        response.headers['Access-Control-Allow-Methods'] = ', '.join(
            cors_config.get('allowed_methods', ['GET', 'OPTIONS'])
        )
        response.headers['Access-Control-Allow-Headers'] = ', '.join(
            cors_config.get('allowed_headers', ['Content-Type', 'X-API-Key'])
        )
    
    # Rate limit bilgisini header'a ekle
    if hasattr(g, 'rate_limit_remaining'):
        response.headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
    
    return response

# OPTIONS handler for CORS
@app.route('/', methods=['OPTIONS'])
@app.route('/<path:path>', methods=['OPTIONS'])
def handle_options(path=None):
    """CORS preflight request handler"""
    config = load_security_config()
    cors_config = config.get('cors', {})
    
    response = Response(status=200)
    
    if cors_config.get('enabled', True):
        origins = cors_config.get('allowed_origins', ['*'])
        if '*' in origins:
            response.headers['Access-Control-Allow-Origin'] = '*'
        else:
            origin = request.headers.get('Origin')
            if origin in origins:
                response.headers['Access-Control-Allow-Origin'] = origin
        
        response.headers['Access-Control-Allow-Methods'] = ', '.join(
            cors_config.get('allowed_methods', ['GET', 'OPTIONS'])
        )
        response.headers['Access-Control-Allow-Headers'] = ', '.join(
            cors_config.get('allowed_headers', ['Content-Type', 'X-API-Key'])
        )
        response.headers['Access-Control-Max-Age'] = '3600'
    
    return response

# Uygulama başlangıcında security'yi initialize et
init_app_security()

# Global instances (lazy loading)
db = None
extractor = None
cleaner = None


def get_db():
    """Database instance'ı al (singleton)"""
    global db
    if db is None:
        db = Database()
        db.init_database()
    return db


def get_extractor():
    """Transcript extractor instance'ı al"""
    global extractor
    if extractor is None:
        extractor = TranscriptExtractor()
    return extractor


def get_cleaner():
    """Transcript cleaner instance'ı al"""
    global cleaner
    if cleaner is None:
        cleaner = TranscriptCleaner()
    return cleaner


def normalize_channel_id(channel_id: Optional[str] = None,
                        channel: Optional[str] = None,
                        channel_url: Optional[str] = None) -> Optional[str]:
    """
    Farklı formatlardan channel ID'yi normalize et ve validate et
    
    Args:
        channel_id: Direkt Channel ID (UC...)
        channel: Channel handle (@username) veya username
        channel_url: Full YouTube channel URL
    
    Returns:
        Normalize edilmiş ve validate edilmiş Channel ID veya None
    """
    security = get_security_manager()
    normalized_id = None
    
    # Direkt Channel ID varsa
    if channel_id:
        if channel_id.startswith('UC') and len(channel_id) == 24:
            normalized_id = channel_id
        # Eğer URL formatında ise parse et
        elif 'youtube.com/channel/' in channel_id:
            parts = channel_id.split('/channel/')
            if len(parts) > 1:
                normalized_id = parts[-1].split('?')[0].split('/')[0]
    
    # Channel handle (@username)
    if not normalized_id and channel:
        # Channel parametresini normalize et (@ işareti olabilir veya olmayabilir)
        channel = channel.strip()
        if not channel.startswith('@'):
            channel = f"@{channel}"
        handle_url = f"https://www.youtube.com/{channel}"
        logger.info(f"[NORMALIZE] Channel handle URL oluşturuldu: {handle_url}")
        normalized_id = get_channel_id_from_handle(handle_url)
    
    # Channel URL
    if not normalized_id and channel_url:
        # URL'yi temizle ve normalize et
        channel_url = channel_url.strip()
        
        # Handle URL (@username formatı)
        if '/@' in channel_url:
            # URL'den handle'ı çıkar
            if '/@' in channel_url:
                # https://www.youtube.com/@username formatı
                normalized_id = get_channel_id_from_handle(channel_url)
            else:
                # Sadece @username formatı
                handle = channel_url.replace('@', '').strip()
                if handle:
                    handle_url = f"https://www.youtube.com/@{handle}"
                    normalized_id = get_channel_id_from_handle(handle_url)
        # Channel ID URL
        elif '/channel/' in channel_url:
            parts = channel_url.split('/channel/')
            if len(parts) > 1:
                channel_id_part = parts[-1].split('?')[0].split('/')[0].split('&')[0]
                # Eğer UC ile başlıyorsa ve 24 karakter ise, direkt kullan
                if channel_id_part.startswith('UC') and len(channel_id_part) == 24:
                    normalized_id = channel_id_part
                else:
                    # Parse etmeye çalış
                    normalized_id = channel_id_part
        # Sadece handle (@username) formatı
        elif channel_url.startswith('@'):
            handle = channel_url.replace('@', '').strip()
            if handle:
                handle_url = f"https://www.youtube.com/@{handle}"
                normalized_id = get_channel_id_from_handle(handle_url)
        # Direkt channel ID formatı (UC...)
        elif channel_url.startswith('UC') and len(channel_url) == 24:
            normalized_id = channel_url
    
    # Validate: Channel ID formatını kontrol et
    if normalized_id and security.validate_channel_id(normalized_id):
        return normalized_id
    
    # Geçersiz format
    return None


def process_channel(channel_id: str, max_items: int = 50) -> dict:
    """
    Kanal için transcript feed'i oluştur
    
    Returns:
        RSS feed string ve metadata
    """
    db = get_db()
    extractor = get_extractor()
    cleaner = get_cleaner()
    
    # ÖNCE: Mevcut işlenmiş videoları kontrol et
    existing_processed = db.get_processed_videos(limit=max_items, channel_id=channel_id)
    logger.info(f"[PROCESS] Channel {channel_id} için {len(existing_processed)} mevcut işlenmiş video bulundu")
    
    # Eğer yeterli sayıda işlenmiş video varsa, onları hemen döndür
    if len(existing_processed) >= max_items:
        logger.info(f"[PROCESS] ✅ Yeterli işlenmiş video var ({len(existing_processed)}), yeni işleme başlatılmıyor")
        return {
            'videos': existing_processed[:max_items],
            'channel_id': channel_id,
            'count': len(existing_processed[:max_items])
        }
    
    # Eğer mevcut işlenmiş videolar varsa ama yeterli değilse, onları döndür ve yeni işlemeleri başlat
    # Ancak sadece ilk batch'i işle (hızlı yanıt için)
    if len(existing_processed) > 0:
        logger.info(f"[PROCESS] ⚠️ Mevcut işlenmiş video var ama yeterli değil ({len(existing_processed)}/{max_items}), yeni işleme başlatılıyor")
        # Mevcut videoları döndürmek için sakla
        videos_to_return = existing_processed.copy()
    else:
        videos_to_return = []
    
    # RSS-Bridge'den videoları çek (max_items'ın 2 katı kadar çek, böylece yeterli video olur)
    # RSS-Bridge'den daha fazla video çekiyoruz çünkü bazıları transcript'siz olabilir
    rss_bridge_limit = max(max_items * 2, 50)  # En az 50 video çek
    logger.info(f"[PROCESS] Channel {channel_id} için RSS-Bridge'den video listesi çekiliyor (limit: {rss_bridge_limit})")
    
    try:
        videos = fetch_videos_from_rss_bridge(
            base_url="https://rss-bridge.org/bridge01",
            channel_id=channel_id,
            format="Atom",
            max_items=rss_bridge_limit
        )
        logger.info(f"[PROCESS] RSS-Bridge'den {len(videos)} video alındı")
    except Exception as e:
        logger.error(f"[PROCESS] ❌ RSS-Bridge hatası: {type(e).__name__} - {str(e)}")
        raise Exception(f"RSS-Bridge hatası: {e}")
    
    # Yeni videoları veritabanına ekle
    new_videos_count = 0
    for video in videos:
        video['channel_id'] = channel_id
        if not db.is_video_processed(video['video_id']):
            db.add_video(video)
            new_videos_count += 1
    
    if new_videos_count > 0:
        logger.info(f"[PROCESS] {new_videos_count} yeni video veritabanına eklendi")
    else:
        logger.debug(f"[PROCESS] Tüm videolar zaten veritabanında")
    
    # Bekleyen videoları işle (max_items kadar, küçük batch'ler halinde)
    # YouTube IP blocking'i önlemek için her batch'te sadece 5 video işlenir
    # max_items: Her istekte kaç video transcript işleneceği (maksimum 100)
    batch_size = 5  # Her batch'te işlenecek video sayısı (küçük batch = daha az blocking riski)
    processed_count = 0  # İşlenen transcript sayısı
    
    # Tüm bekleyen videoları al (channel_id'ye göre filtrele)
    all_pending_videos = [v for v in db.get_pending_videos() if v['channel_id'] == channel_id]
    logger.info(f"[PROCESS] Channel {channel_id} için {len(all_pending_videos)} bekleyen video bulundu (max_items: {max_items})")
    
    # Eğer mevcut işlenmiş videolar varsa, sadece eksik kadar işle
    remaining_needed = max_items - len(videos_to_return)
    
    # max_items kadar transcript işlenene kadar batch'ler halinde işle
    total_batches = (len(all_pending_videos) + batch_size - 1) // batch_size
    current_batch = 0
    
    # İlk istek için sadece ilk batch'i işle (hızlı yanıt için)
    # Sonraki isteklerde daha fazla işlenmiş video olacak
    max_batches_to_process = 1 if len(videos_to_return) == 0 else min(3, total_batches)  # İlk istekte 1 batch, sonra 3 batch
    
    for batch_start in range(0, len(all_pending_videos), batch_size):
        if processed_count >= remaining_needed:
            logger.info(f"[PROCESS] Yeterli transcript işlendi ({processed_count}/{remaining_needed})")
            break
        
        if current_batch >= max_batches_to_process:
            logger.info(f"[PROCESS] İlk batch'ler işlendi ({current_batch}/{max_batches_to_process}), kalan işlemeler sonraki isteklerde yapılacak")
            break
        
        current_batch += 1
        batch_videos = all_pending_videos[batch_start:batch_start + batch_size]
        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} başlatılıyor ({len(batch_videos)} video, Toplam işlenen: {processed_count}/{max_items})")
        
        batch_processed = 0
        batch_cached = 0
        batch_failed = 0
        
        for video in batch_videos:
            if processed_count >= max_items:
                break
            
            video_id = video['video_id']
            video_title = video.get('video_title', 'N/A')[:50]
            
            # Cache kontrolü: 3 gün içinde işlenmiş transcript varsa atla
            if db.is_transcript_cached(video_id, cache_days=3):
                logger.debug(f"[CACHE] Video {video_id} ({video_title}) transcript'i cache'de, atlanıyor")
                batch_cached += 1
                continue
                
            try:
                logger.info(f"[VIDEO] Video işleniyor: {video_id} - {video_title}")
                
                # Transcript çıkar
                transcript = extractor.fetch_transcript(
                    video_id,
                    languages=['tr', 'en']
                )
                
                if transcript:
                    # Transcript temizle
                    logger.debug(f"[VIDEO] Video {video_id} transcript'i temizleniyor...")
                    raw, clean = cleaner.clean_transcript(transcript, sentences_per_paragraph=3)
                    
                    # Veritabanına kaydet (her batch hemen kaydedilir)
                    db.update_video_transcript(
                        video_id,
                        raw,
                        clean,
                        status=1,
                        language='tr'
                    )
                    processed_count += 1
                    batch_processed += 1
                    logger.info(f"[VIDEO] ✅ Video {video_id} başarıyla işlendi ve kaydedildi ({processed_count}/{max_items})")
                else:
                    logger.warning(f"[VIDEO] ⚠️ Video {video_id} transcript'i alınamadı (None döndü)")
                    batch_failed += 1
                    db.mark_video_failed(video_id, "Transcript None döndü")
            except Exception as e:
                error_type = type(e).__name__
                error_msg = str(e)[:200]
                logger.error(f"[VIDEO] ❌ Video {video_id} işleme hatası: {error_type} - {error_msg}")
                db.mark_video_failed(video_id, str(e))
                batch_failed += 1
        
        # Batch özeti
        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} tamamlandı - İşlenen: {batch_processed}, Cache: {batch_cached}, Başarısız: {batch_failed}")
        
        # Batch tamamlandı, uzun bekleme (YouTube IP blocking önleme için)
        # İlk batch'ler için daha kısa bekleme (hızlı yanıt için), sonraki batch'ler için uzun bekleme
        if processed_count < remaining_needed and batch_start + batch_size < len(all_pending_videos):
            # İlk batch'ler için kısa bekleme (2-5 saniye), sonraki batch'ler için uzun bekleme (60-90 saniye)
            if current_batch <= max_batches_to_process:
                wait_time = 2 + random.uniform(0, 3)  # 2-5 saniye (hızlı yanıt için)
                logger.info(f"[BATCH] Batch'ler arası kısa bekleme: {wait_time:.1f} saniye (hızlı yanıt için)")
            else:
                wait_time = 60 + random.uniform(0, 30)  # 60-90 saniye random (human-like)
                logger.info(f"[BATCH] Batch'ler arası uzun bekleme: {wait_time:.1f} saniye ({wait_time/60:.1f} dakika) - YouTube IP blocking önleme")
            time.sleep(wait_time)
    
    # İşlenmiş videoları getir (yeni işlenenler)
    newly_processed = db.get_processed_videos(
        limit=max_items,
        channel_id=channel_id
    )
    
    # Mevcut videoları ve yeni işlenen videoları birleştir (duplicate kontrolü ile)
    all_processed_videos = videos_to_return.copy()  # Önce mevcut videoları ekle
    existing_ids = {v['video_id'] for v in all_processed_videos}
    
    # Yeni işlenen videoları ekle
    for video in newly_processed:
        if video['video_id'] not in existing_ids and len(all_processed_videos) < max_items:
            all_processed_videos.append(video)
    
    # Tarihe göre sırala (en yeni önce)
    all_processed_videos.sort(
        key=lambda x: x.get('published_at_utc', '') or '',
        reverse=True
    )
    
    logger.info(f"[PROCESS] ✅ Channel {channel_id} işleme tamamlandı - {len(all_processed_videos)} işlenmiş video döndürülüyor (Mevcut: {len(videos_to_return)}, Yeni işlenen: {len(newly_processed)})")
    
    return {
        'videos': all_processed_videos[:max_items],
        'channel_id': channel_id,
        'count': len(all_processed_videos[:max_items])
    }


@app.route('/', methods=['GET'])
@require_api_key  # API key zorunlu
@validate_input  # Input validation
def generate_feed():
    """
    RSS-Bridge benzeri URL template:
    
    Örnekler:
    - /?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom
    - /?channel=@tavakfi&format=Atom
    - /?channel_url=https://www.youtube.com/@tavakfi&format=Atom
    """
    # User-Agent kontrolü (RSS okuyucu tespiti için)
    user_agent = request.headers.get('User-Agent', '')
    is_rss_reader = any(keyword in user_agent.lower() for keyword in [
        'rss', 'feed', 'reader', 'aggregator', 'feedly', 'newsblur',
        'inoreader', 'theoldreader', 'netnewswire', 'reeder'
    ])
    
    # Query parametrelerini al (validate_input decorator zaten sanitize etti)
    # URL decode işlemi (tarayıcılar URL'leri encode edebilir, özellikle channel_url içinde başka URL varsa)
    channel_id_raw = request.args.get('channel_id')
    channel_raw = request.args.get('channel')  # @username veya username
    channel_url_raw = request.args.get('channel_url')
    
    # Channel ID'yi decode et
    channel_id = None
    if channel_id_raw:
        channel_id = unquote(channel_id_raw) if '%' in channel_id_raw else channel_id_raw
    
    # Channel handle'ı decode et
    channel = None
    if channel_raw:
        channel = unquote(channel_raw) if '%' in channel_raw else channel_raw
        # @ işaretini temizle ve normalize et
        channel = channel.strip().lstrip('@')
    
    # Channel URL'yi decode et (eğer encode edilmişse)
    # Flask request.args zaten decode eder ama channel_url içinde başka URL olduğu için double encoding olabilir
    channel_url = None
    if channel_url_raw:
        # Önce raw değeri al (Flask'ın decode ettiği değer)
        channel_url = channel_url_raw
        
        # Eğer hala encode edilmiş görünüyorsa (%, + gibi karakterler varsa), decode et
        if '%' in channel_url or '+' in channel_url:
            # Birden fazla kez encode edilmiş olabilir, güvenli decode
            max_decode_attempts = 3
            for _ in range(max_decode_attempts):
                decoded = unquote(channel_url)
                if decoded == channel_url:  # Artık decode edilecek bir şey yok
                    break
                channel_url = decoded
                if '%' not in channel_url:  # Tamamen decode edildi
                    break
        
        # URL formatını kontrol et ve düzelt
        if channel_url and not channel_url.startswith(('http://', 'https://')):
            # Eğer protocol yoksa, https ekle
            if channel_url.startswith('www.youtube.com') or channel_url.startswith('youtube.com'):
                channel_url = 'https://' + channel_url
            elif channel_url.startswith('@'):
                channel_url = 'https://www.youtube.com/' + channel_url
    
    format_type = request.args.get('format', 'Atom').lower()  # Atom veya Rss
    try:
        max_items = int(request.args.get('max_items', 10))  # Default: 10 transcript
        # Maksimum 100 transcript (20'şer batch'ler halinde işlenir)
        max_items = min(max_items, 100)
    except (ValueError, TypeError):
        max_items = 10
    
    # Debug logging (tarayıcı istekleri için)
    logger.info(f"[REQUEST] Tarayıcı isteği - Raw params: channel_id={channel_id_raw}, channel={channel_raw}, channel_url={channel_url_raw[:100] if channel_url_raw else None}")
    logger.info(f"[REQUEST] Processed params: channel_id={channel_id}, channel={channel}, channel_url={channel_url[:100] if channel_url else None}")
    logger.info(f"[REQUEST] Full URL: {request.url}")
    logger.info(f"[REQUEST] Query string: {request.query_string.decode('utf-8') if request.query_string else None}")
    
    # RSS okuyucu tespiti için log
    if is_rss_reader:
        logger.info(f"[RSS_READER] RSS okuyucu tespit edildi: {user_agent[:100]}")
    
    # Channel ID'yi normalize et
    try:
        normalized_channel_id = normalize_channel_id(
            channel_id=channel_id,
            channel=channel,
            channel_url=channel_url
        )
        logger.info(f"[REQUEST] Normalized channel_id: {normalized_channel_id}")
    except Exception as e:
        logger.error(f"[REQUEST] ❌ Channel ID normalize hatası: {type(e).__name__} - {str(e)}")
        normalized_channel_id = None
    
    if not normalized_channel_id:
        error_msg = 'Channel ID bulunamadı veya geçersiz format'
        if channel_url:
            error_msg += f'. URL: {channel_url}'
        elif channel:
            error_msg += f'. Handle: {channel}'
        elif channel_id:
            error_msg += f'. Channel ID: {channel_id}'
        
        logger.warning(f"[REQUEST] ❌ Channel ID bulunamadı - Raw: channel_id={channel_id_raw}, channel={channel_raw}, channel_url={channel_url_raw}")
        logger.warning(f"[REQUEST] ❌ Processed: channel_id={channel_id}, channel={channel}, channel_url={channel_url}")
        
        # RSS okuyucular için daha açıklayıcı hata mesajı
        if is_rss_reader:
            logger.warning(f"[RSS_READER] Channel ID bulunamadı - URL: {channel_url or channel or channel_id}")
            return jsonify({
                'error': error_msg,
                'message': 'RSS okuyucunuzdan feed eklerken lütfen geçerli bir YouTube kanal URL\'si kullanın',
                'received_params': {
                    'channel_id': channel_id_raw,
                    'channel': channel_raw,
                    'channel_url': channel_url_raw,
                    'decoded_channel_url': channel_url
                },
                'example_url': f'{request.url_root}?channel_url=https://www.youtube.com/@username&api_key=YOUR_API_KEY&format=Atom',
                'usage': {
                    'channel_id': 'UC... (YouTube Channel ID, 24 karakter)',
                    'channel': '@username veya username',
                    'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...',
                    'format': 'Atom veya Rss (varsayılan: Atom)',
                    'max_items': 'Maksimum transcript sayısı (varsayılan: 10, maksimum: 100)',
                    'api_key': 'API key query parametresi olarak eklenmelidir (RSS okuyucular header gönderemez)'
                }
            }), 400
        
        return jsonify({
            'error': error_msg,
            'received_params': {
                'channel_id': channel_id_raw,
                'channel': channel_raw,
                'channel_url': channel_url_raw,
                'decoded_channel_url': channel_url
            },
            'message': 'YouTube Channel ID UC ile başlayan 24 karakter olmalı (sadece alfanumerik ve alt çizgi)',
            'usage': {
                'channel_id': 'UC... (YouTube Channel ID, 24 karakter)',
                'channel': '@username veya username',
                'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...',
                'format': 'Atom veya Rss (varsayılan: Atom)',
                'max_items': 'Maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
            }
        }), 400
    
    try:
        # Kanalı işle
        result = process_channel(normalized_channel_id, max_items=max_items)
        
        if not result['videos']:
            return jsonify({
                'error': 'Henüz işlenmiş video yok',
                'channel_id': normalized_channel_id,
                'message': 'Transcript\'ler arka planda işleniyor. Lütfen birkaç dakika sonra tekrar deneyin.',
                'note': 'YouTube IP blocking nedeniyle transcript çıkarımı yavaş olabilir. İlk istekte birkaç dakika bekleyin.'
            }), 404
        
        # RSS feed oluştur
        channel_info = {
            'id': normalized_channel_id,
            'title': f"YouTube Transcript Feed - {normalized_channel_id}",
            'link': f"https://www.youtube.com/channel/{normalized_channel_id}",
            'description': f'Full-text transcript RSS feed for channel {normalized_channel_id}',
            'language': 'en'
        }
        
        generator = RSSGenerator(channel_info)
        
        for video in result['videos']:
            generator.add_video_entry(video)
        
        # Format'a göre döndür
        response_headers = {}
        if hasattr(g, 'rate_limit_remaining'):
            response_headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
        
        if format_type == 'rss':
            rss_content = generator.generate_rss_string()
            response_headers['Content-Type'] = 'application/rss+xml; charset=utf-8'
            return Response(
                rss_content,
                mimetype='application/rss+xml',
                headers=response_headers
            )
        else:  # Atom
            # Feedgen Atom desteği
            atom_content = generator.generate_atom_string()
            response_headers['Content-Type'] = 'application/atom+xml; charset=utf-8'
            return Response(
                atom_content,
                mimetype='application/atom+xml',
                headers=response_headers
            )
            
    except Exception as e:
        return jsonify({
            'error': str(e),
            'channel_id': normalized_channel_id
        }), 500


@app.route('/health', methods=['GET'])
@rate_limit(limit_per_minute=120)  # Health check için daha yüksek limit
def health():
    """Health check endpoint"""
    return jsonify({'status': 'ok', 'service': 'YouTube Transcript RSS Feed'})


@app.route('/info', methods=['GET'])
@require_api_key  # API key zorunlu
def info():
    """API bilgileri"""
    return jsonify({
        'service': 'YouTube Transcript RSS Feed Generator',
        'version': '1.0.0',
        'endpoints': {
            '/': 'RSS Feed Generator',
            '/health': 'Health Check',
            '/info': 'API Info'
        },
        'usage': {
            'channel_id': 'UC... (YouTube Channel ID)',
            'channel': '@username veya username',
            'channel_url': 'Full YouTube channel URL',
            'format': 'Atom veya Rss (varsayılan: Atom)',
            'max_items': 'Her istekte işlenecek maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
        },
        'examples': [
            '/?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom',
            '/?channel=@tavakfi&format=Rss',
            '/?channel_url=https://www.youtube.com/@tavakfi&format=Atom&max_items=50'
        ]
    })


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								"""
 								Flask web server - RSS-Bridge benzeri URL template sistemi
 								"""
-												Transkript ip blocked

											
										
										
											2025-11-13 03:52:26 +03:00
+								from flask import Flask, request, Response, jsonify, g
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								from typing import Optional
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								from urllib.parse import unquote, urlparse
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								import sys
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								import os
 								import yaml
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								import time
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								import logging
-												yt bot protection

											
										
										
											2025-11-13 05:31:43 +03:00
+								import random
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								from pathlib import Path
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								# Logger oluştur
 								logger = logging.getLogger(__name__)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								sys.path.insert(0, str(Path(__file__).parent.parent))
 								from src.database import Database
 								from src.video_fetcher import fetch_videos_from_rss_bridge, get_channel_id_from_handle, extract_video_id
 								from src.transcript_extractor import TranscriptExtractor
 								from src.transcript_cleaner import TranscriptCleaner
 								from src.rss_generator import RSSGenerator
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								from src.security import (
 								    init_security, get_security_manager,
 								    require_api_key, rate_limit, validate_input
 								)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
 								app = Flask(__name__)
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								# Security config yükle
 								_security_config = None
 								def load_security_config():
 								    """Security config'i yükle"""
 								    global _security_config
 								    if _security_config is None:
 								        config_path = Path(__file__).parent.parent / 'config' / 'security.yaml'
 								        if config_path.exists():
 								            with open(config_path, 'r', encoding='utf-8') as f:
 								                _security_config = yaml.safe_load(f).get('security', {})
 								        else:
 								            _security_config = {}
 								    return _security_config
 								# Security manager'ı initialize et
 								def init_app_security():
 								    """Security manager'ı uygulama başlangıcında initialize et"""
 								    config = load_security_config()
 								    api_keys = config.get('api_keys', {})
 								    default_rate_limit = config.get('default_rate_limit', 60)
 								    init_security(api_keys, default_rate_limit)
 								# Security headers ve CORS middleware
 								@app.after_request
 								def add_security_headers(response):
 								    """Security header'ları ekle"""
 								    config = load_security_config()
 								    headers = config.get('security_headers', {})
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # RSS feed'ler için Content-Security-Policy'yi daha esnek yap
 								    # RSS okuyucular ve tarayıcılar için sorun çıkarmasın
 								    is_feed_response = (
 								        'application/atom+xml' in response.content_type or
 								        'application/rss+xml' in response.content_type or
 								        'application/xml' in response.content_type or
 								        'text/xml' in response.content_type
 								    )
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								    for header, value in headers.items():
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        # RSS feed'ler için CSP'yi atla veya daha esnek yap
 								        if header == 'Content-Security-Policy' and is_feed_response:
 								            # RSS feed'ler için CSP'yi daha esnek yap
 								            response.headers[header] = "default-src 'self' 'unsafe-inline' data: blob: *"
 								        else:
 								            response.headers[header] = value
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
 								    # CORS headers
 								    cors_config = config.get('cors', {})
 								    if cors_config.get('enabled', True):
 								        origins = cors_config.get('allowed_origins', ['*'])
 								        if '*' in origins:
 								            response.headers['Access-Control-Allow-Origin'] = '*'
 								        else:
 								            origin = request.headers.get('Origin')
 								            if origin in origins:
 								                response.headers['Access-Control-Allow-Origin'] = origin
 								        response.headers['Access-Control-Allow-Methods'] = ', '.join(
 								            cors_config.get('allowed_methods', ['GET', 'OPTIONS'])
 								        )
 								        response.headers['Access-Control-Allow-Headers'] = ', '.join(
 								            cors_config.get('allowed_headers', ['Content-Type', 'X-API-Key'])
 								        )
 								    # Rate limit bilgisini header'a ekle
 								    if hasattr(g, 'rate_limit_remaining'):
 								        response.headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
 								    return response
 								# OPTIONS handler for CORS
 								@app.route('/', methods=['OPTIONS'])
 								@app.route('/<path:path>', methods=['OPTIONS'])
 								def handle_options(path=None):
 								    """CORS preflight request handler"""
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    config = load_security_config()
 								    cors_config = config.get('cors', {})
 								    response = Response(status=200)
 								    if cors_config.get('enabled', True):
 								        origins = cors_config.get('allowed_origins', ['*'])
 								        if '*' in origins:
 								            response.headers['Access-Control-Allow-Origin'] = '*'
 								        else:
 								            origin = request.headers.get('Origin')
 								            if origin in origins:
 								                response.headers['Access-Control-Allow-Origin'] = origin
 								        response.headers['Access-Control-Allow-Methods'] = ', '.join(
 								            cors_config.get('allowed_methods', ['GET', 'OPTIONS'])
 								        )
 								        response.headers['Access-Control-Allow-Headers'] = ', '.join(
 								            cors_config.get('allowed_headers', ['Content-Type', 'X-API-Key'])
 								        )
 								        response.headers['Access-Control-Max-Age'] = '3600'
 								    return response
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
 								# Uygulama başlangıcında security'yi initialize et
 								init_app_security()
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								# Global instances (lazy loading)
 								db = None
 								extractor = None
 								cleaner = None
 								def get_db():
 								    """Database instance'ı al (singleton)"""
 								    global db
 								    if db is None:
 								        db = Database()
 								        db.init_database()
 								    return db
 								def get_extractor():
 								    """Transcript extractor instance'ı al"""
 								    global extractor
 								    if extractor is None:
 								        extractor = TranscriptExtractor()
 								    return extractor
 								def get_cleaner():
 								    """Transcript cleaner instance'ı al"""
 								    global cleaner
 								    if cleaner is None:
 								        cleaner = TranscriptCleaner()
 								    return cleaner
 								def normalize_channel_id(channel_id: Optional[str] = None,
 								                        channel: Optional[str] = None,
 								                        channel_url: Optional[str] = None) -> Optional[str]:
 								    """
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    Farklı formatlardan channel ID'yi normalize et ve validate et
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
 								    Args:
 								        channel_id: Direkt Channel ID (UC...)
 								        channel: Channel handle (@username) veya username
 								        channel_url: Full YouTube channel URL
 								    Returns:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        Normalize edilmiş ve validate edilmiş Channel ID veya None
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    """
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    security = get_security_manager()
 								    normalized_id = None
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    # Direkt Channel ID varsa
 								    if channel_id:
 								        if channel_id.startswith('UC') and len(channel_id) == 24:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								            normalized_id = channel_id
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        # Eğer URL formatında ise parse et
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        elif 'youtube.com/channel/' in channel_id:
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            parts = channel_id.split('/channel/')
 								            if len(parts) > 1:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								                normalized_id = parts[-1].split('?')[0].split('/')[0]
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
 								    # Channel handle (@username)
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    if not normalized_id and channel:
 								        # Channel parametresini normalize et (@ işareti olabilir veya olmayabilir)
 								        channel = channel.strip()
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        if not channel.startswith('@'):
 								            channel = f"@{channel}"
 								        handle_url = f"https://www.youtube.com/{channel}"
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        logger.info(f"[NORMALIZE] Channel handle URL oluşturuldu: {handle_url}")
 								        normalized_id = get_channel_id_from_handle(handle_url)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
 								    # Channel URL
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    if not normalized_id and channel_url:
 								        # URL'yi temizle ve normalize et
 								        channel_url = channel_url.strip()
 								        # Handle URL (@username formatı)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        if '/@' in channel_url:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								            # URL'den handle'ı çıkar
 								            if '/@' in channel_url:
 								                # https://www.youtube.com/@username formatı
 								                normalized_id = get_channel_id_from_handle(channel_url)
 								            else:
 								                # Sadece @username formatı
 								                handle = channel_url.replace('@', '').strip()
 								                if handle:
 								                    handle_url = f"https://www.youtube.com/@{handle}"
 								                    normalized_id = get_channel_id_from_handle(handle_url)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        # Channel ID URL
 								        elif '/channel/' in channel_url:
 								            parts = channel_url.split('/channel/')
 								            if len(parts) > 1:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								                channel_id_part = parts[-1].split('?')[0].split('/')[0].split('&')[0]
 								                # Eğer UC ile başlıyorsa ve 24 karakter ise, direkt kullan
 								                if channel_id_part.startswith('UC') and len(channel_id_part) == 24:
 								                    normalized_id = channel_id_part
 								                else:
 								                    # Parse etmeye çalış
 								                    normalized_id = channel_id_part
 								        # Sadece handle (@username) formatı
 								        elif channel_url.startswith('@'):
 								            handle = channel_url.replace('@', '').strip()
 								            if handle:
 								                handle_url = f"https://www.youtube.com/@{handle}"
 								                normalized_id = get_channel_id_from_handle(handle_url)
 								        # Direkt channel ID formatı (UC...)
 								        elif channel_url.startswith('UC') and len(channel_url) == 24:
 								            normalized_id = channel_url
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # Validate: Channel ID formatını kontrol et
 								    if normalized_id and security.validate_channel_id(normalized_id):
 								        return normalized_id
 								    # Geçersiz format
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    return None
 								def process_channel(channel_id: str, max_items: int = 50) -> dict:
 								    """
 								    Kanal için transcript feed'i oluştur
 								    Returns:
 								        RSS feed string ve metadata
 								    """
 								    db = get_db()
 								    extractor = get_extractor()
 								    cleaner = get_cleaner()
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # ÖNCE: Mevcut işlenmiş videoları kontrol et
 								    existing_processed = db.get_processed_videos(limit=max_items, channel_id=channel_id)
 								    logger.info(f"[PROCESS] Channel {channel_id} için {len(existing_processed)} mevcut işlenmiş video bulundu")
 								    # Eğer yeterli sayıda işlenmiş video varsa, onları hemen döndür
 								    if len(existing_processed) >= max_items:
 								        logger.info(f"[PROCESS] ✅ Yeterli işlenmiş video var ({len(existing_processed)}), yeni işleme başlatılmıyor")
 								        return {
 								            'videos': existing_processed[:max_items],
 								            'channel_id': channel_id,
 								            'count': len(existing_processed[:max_items])
 								        }
 								    # Eğer mevcut işlenmiş videolar varsa ama yeterli değilse, onları döndür ve yeni işlemeleri başlat
 								    # Ancak sadece ilk batch'i işle (hızlı yanıt için)
 								    if len(existing_processed) > 0:
 								        logger.info(f"[PROCESS] ⚠️ Mevcut işlenmiş video var ama yeterli değil ({len(existing_processed)}/{max_items}), yeni işleme başlatılıyor")
 								        # Mevcut videoları döndürmek için sakla
 								        videos_to_return = existing_processed.copy()
 								    else:
 								        videos_to_return = []
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    # RSS-Bridge'den videoları çek (max_items'ın 2 katı kadar çek, böylece yeterli video olur)
 								    # RSS-Bridge'den daha fazla video çekiyoruz çünkü bazıları transcript'siz olabilir
 								    rss_bridge_limit = max(max_items * 2, 50)  # En az 50 video çek
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								    logger.info(f"[PROCESS] Channel {channel_id} için RSS-Bridge'den video listesi çekiliyor (limit: {rss_bridge_limit})")
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    try:
 								        videos = fetch_videos_from_rss_bridge(
 								            base_url="https://rss-bridge.org/bridge01",
 								            channel_id=channel_id,
 								            format="Atom",
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            max_items=rss_bridge_limit
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        )
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								        logger.info(f"[PROCESS] RSS-Bridge'den {len(videos)} video alındı")
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    except Exception as e:
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								        logger.error(f"[PROCESS] ❌ RSS-Bridge hatası: {type(e).__name__} - {str(e)}")
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        raise Exception(f"RSS-Bridge hatası: {e}")
 								    # Yeni videoları veritabanına ekle
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								    new_videos_count = 0
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    for video in videos:
 								        video['channel_id'] = channel_id
 								        if not db.is_video_processed(video['video_id']):
 								            db.add_video(video)
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								            new_videos_count += 1
 								    if new_videos_count > 0:
 								        logger.info(f"[PROCESS] {new_videos_count} yeni video veritabanına eklendi")
 								    else:
 								        logger.debug(f"[PROCESS] Tüm videolar zaten veritabanında")
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												yt bot protection

											
										
										
											2025-11-13 05:31:43 +03:00
+								    # Bekleyen videoları işle (max_items kadar, küçük batch'ler halinde)
 								    # YouTube IP blocking'i önlemek için her batch'te sadece 5 video işlenir
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    # max_items: Her istekte kaç video transcript işleneceği (maksimum 100)
-												yt bot protection

											
										
										
											2025-11-13 05:31:43 +03:00
+								    batch_size = 5  # Her batch'te işlenecek video sayısı (küçük batch = daha az blocking riski)
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    processed_count = 0  # İşlenen transcript sayısı
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    # Tüm bekleyen videoları al (channel_id'ye göre filtrele)
 								    all_pending_videos = [v for v in db.get_pending_videos() if v['channel_id'] == channel_id]
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								    logger.info(f"[PROCESS] Channel {channel_id} için {len(all_pending_videos)} bekleyen video bulundu (max_items: {max_items})")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # Eğer mevcut işlenmiş videolar varsa, sadece eksik kadar işle
 								    remaining_needed = max_items - len(videos_to_return)
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    # max_items kadar transcript işlenene kadar batch'ler halinde işle
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								    total_batches = (len(all_pending_videos) + batch_size - 1) // batch_size
 								    current_batch = 0
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # İlk istek için sadece ilk batch'i işle (hızlı yanıt için)
 								    # Sonraki isteklerde daha fazla işlenmiş video olacak
 								    max_batches_to_process = 1 if len(videos_to_return) == 0 else min(3, total_batches)  # İlk istekte 1 batch, sonra 3 batch
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								    for batch_start in range(0, len(all_pending_videos), batch_size):
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        if processed_count >= remaining_needed:
 								            logger.info(f"[PROCESS] Yeterli transcript işlendi ({processed_count}/{remaining_needed})")
 								            break
 								        if current_batch >= max_batches_to_process:
 								            logger.info(f"[PROCESS] İlk batch'ler işlendi ({current_batch}/{max_batches_to_process}), kalan işlemeler sonraki isteklerde yapılacak")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            break
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
 								        current_batch += 1
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								        batch_videos = all_pending_videos[batch_start:batch_start + batch_size]
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} başlatılıyor ({len(batch_videos)} video, Toplam işlenen: {processed_count}/{max_items})")
 								        batch_processed = 0
 								        batch_cached = 0
 								        batch_failed = 0
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
 								        for video in batch_videos:
 								            if processed_count >= max_items:
 								                break
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								            video_id = video['video_id']
 								            video_title = video.get('video_title', 'N/A')[:50]
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            # Cache kontrolü: 3 gün içinde işlenmiş transcript varsa atla
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								            if db.is_transcript_cached(video_id, cache_days=3):
 								                logger.debug(f"[CACHE] Video {video_id} ({video_title}) transcript'i cache'de, atlanıyor")
 								                batch_cached += 1
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                continue
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            try:
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                logger.info(f"[VIDEO] Video işleniyor: {video_id} - {video_title}")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                # Transcript çıkar
 								                transcript = extractor.fetch_transcript(
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                    video_id,
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                    languages=['tr', 'en']
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								                )
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
 								                if transcript:
 								                    # Transcript temizle
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                    logger.debug(f"[VIDEO] Video {video_id} transcript'i temizleniyor...")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                    raw, clean = cleaner.clean_transcript(transcript, sentences_per_paragraph=3)
 								                    # Veritabanına kaydet (her batch hemen kaydedilir)
 								                    db.update_video_transcript(
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                        video_id,
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                        raw,
 								                        clean,
 								                        status=1,
 								                        language='tr'
 								                    )
 								                    processed_count += 1
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                    batch_processed += 1
 								                    logger.info(f"[VIDEO] ✅ Video {video_id} başarıyla işlendi ve kaydedildi ({processed_count}/{max_items})")
 								                else:
 								                    logger.warning(f"[VIDEO] ⚠️ Video {video_id} transcript'i alınamadı (None döndü)")
 								                    batch_failed += 1
 								                    db.mark_video_failed(video_id, "Transcript None döndü")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            except Exception as e:
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								                error_type = type(e).__name__
 								                error_msg = str(e)[:200]
 								                logger.error(f"[VIDEO] ❌ Video {video_id} işleme hatası: {error_type} - {error_msg}")
 								                db.mark_video_failed(video_id, str(e))
 								                batch_failed += 1
 								        # Batch özeti
 								        logger.info(f"[BATCH] Batch {current_batch}/{total_batches} tamamlandı - İşlenen: {batch_processed}, Cache: {batch_cached}, Başarısız: {batch_failed}")
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
-												yt bot protection

											
										
										
											2025-11-13 05:31:43 +03:00
+								        # Batch tamamlandı, uzun bekleme (YouTube IP blocking önleme için)
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        # İlk batch'ler için daha kısa bekleme (hızlı yanıt için), sonraki batch'ler için uzun bekleme
 								        if processed_count < remaining_needed and batch_start + batch_size < len(all_pending_videos):
 								            # İlk batch'ler için kısa bekleme (2-5 saniye), sonraki batch'ler için uzun bekleme (60-90 saniye)
 								            if current_batch <= max_batches_to_process:
 								                wait_time = 2 + random.uniform(0, 3)  # 2-5 saniye (hızlı yanıt için)
 								                logger.info(f"[BATCH] Batch'ler arası kısa bekleme: {wait_time:.1f} saniye (hızlı yanıt için)")
 								            else:
 								                wait_time = 60 + random.uniform(0, 30)  # 60-90 saniye random (human-like)
 								                logger.info(f"[BATCH] Batch'ler arası uzun bekleme: {wait_time:.1f} saniye ({wait_time/60:.1f} dakika) - YouTube IP blocking önleme")
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
+								            time.sleep(wait_time)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # İşlenmiş videoları getir (yeni işlenenler)
 								    newly_processed = db.get_processed_videos(
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        limit=max_items,
 								        channel_id=channel_id
 								    )
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # Mevcut videoları ve yeni işlenen videoları birleştir (duplicate kontrolü ile)
 								    all_processed_videos = videos_to_return.copy()  # Önce mevcut videoları ekle
 								    existing_ids = {v['video_id'] for v in all_processed_videos}
 								    # Yeni işlenen videoları ekle
 								    for video in newly_processed:
 								        if video['video_id'] not in existing_ids and len(all_processed_videos) < max_items:
 								            all_processed_videos.append(video)
 								    # Tarihe göre sırala (en yeni önce)
 								    all_processed_videos.sort(
 								        key=lambda x: x.get('published_at_utc', '') or '',
 								        reverse=True
 								    )
 								    logger.info(f"[PROCESS] ✅ Channel {channel_id} işleme tamamlandı - {len(all_processed_videos)} işlenmiş video döndürülüyor (Mevcut: {len(videos_to_return)}, Yeni işlenen: {len(newly_processed)})")
-												log all

											
										
										
											2025-11-13 05:16:12 +03:00
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    return {
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        'videos': all_processed_videos[:max_items],
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        'channel_id': channel_id,
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        'count': len(all_processed_videos[:max_items])
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    }
 								@app.route('/', methods=['GET'])
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								@require_api_key  # API key zorunlu
 								@validate_input  # Input validation
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								def generate_feed():
 								    """
 								    RSS-Bridge benzeri URL template:
 								    Örnekler:
 								    - /?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom
 								    - /?channel=@tavakfi&format=Atom
 								    - /?channel_url=https://www.youtube.com/@tavakfi&format=Atom
 								    """
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # User-Agent kontrolü (RSS okuyucu tespiti için)
 								    user_agent = request.headers.get('User-Agent', '')
 								    is_rss_reader = any(keyword in user_agent.lower() for keyword in [
 								        'rss', 'feed', 'reader', 'aggregator', 'feedly', 'newsblur',
 								        'inoreader', 'theoldreader', 'netnewswire', 'reeder'
 								    ])
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								    # Query parametrelerini al (validate_input decorator zaten sanitize etti)
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # URL decode işlemi (tarayıcılar URL'leri encode edebilir, özellikle channel_url içinde başka URL varsa)
 								    channel_id_raw = request.args.get('channel_id')
 								    channel_raw = request.args.get('channel')  # @username veya username
 								    channel_url_raw = request.args.get('channel_url')
 								    # Channel ID'yi decode et
 								    channel_id = None
 								    if channel_id_raw:
 								        channel_id = unquote(channel_id_raw) if '%' in channel_id_raw else channel_id_raw
 								    # Channel handle'ı decode et
 								    channel = None
 								    if channel_raw:
 								        channel = unquote(channel_raw) if '%' in channel_raw else channel_raw
 								        # @ işaretini temizle ve normalize et
 								        channel = channel.strip().lstrip('@')
 								    # Channel URL'yi decode et (eğer encode edilmişse)
 								    # Flask request.args zaten decode eder ama channel_url içinde başka URL olduğu için double encoding olabilir
 								    channel_url = None
 								    if channel_url_raw:
 								        # Önce raw değeri al (Flask'ın decode ettiği değer)
 								        channel_url = channel_url_raw
 								        # Eğer hala encode edilmiş görünüyorsa (%, + gibi karakterler varsa), decode et
 								        if '%' in channel_url or '+' in channel_url:
 								            # Birden fazla kez encode edilmiş olabilir, güvenli decode
 								            max_decode_attempts = 3
 								            for _ in range(max_decode_attempts):
 								                decoded = unquote(channel_url)
 								                if decoded == channel_url:  # Artık decode edilecek bir şey yok
 								                    break
 								                channel_url = decoded
 								                if '%' not in channel_url:  # Tamamen decode edildi
 								                    break
 								        # URL formatını kontrol et ve düzelt
 								        if channel_url and not channel_url.startswith(('http://', 'https://')):
 								            # Eğer protocol yoksa, https ekle
 								            if channel_url.startswith('www.youtube.com') or channel_url.startswith('youtube.com'):
 								                channel_url = 'https://' + channel_url
 								            elif channel_url.startswith('@'):
 								                channel_url = 'https://www.youtube.com/' + channel_url
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    format_type = request.args.get('format', 'Atom').lower()  # Atom veya Rss
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								    try:
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								        max_items = int(request.args.get('max_items', 10))  # Default: 10 transcript
 								        # Maksimum 100 transcript (20'şer batch'ler halinde işlenir)
 								        max_items = min(max_items, 100)
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								    except (ValueError, TypeError):
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								        max_items = 10
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    # Debug logging (tarayıcı istekleri için)
 								    logger.info(f"[REQUEST] Tarayıcı isteği - Raw params: channel_id={channel_id_raw}, channel={channel_raw}, channel_url={channel_url_raw[:100] if channel_url_raw else None}")
 								    logger.info(f"[REQUEST] Processed params: channel_id={channel_id}, channel={channel}, channel_url={channel_url[:100] if channel_url else None}")
 								    logger.info(f"[REQUEST] Full URL: {request.url}")
 								    logger.info(f"[REQUEST] Query string: {request.query_string.decode('utf-8') if request.query_string else None}")
 								    # RSS okuyucu tespiti için log
 								    if is_rss_reader:
 								        logger.info(f"[RSS_READER] RSS okuyucu tespit edildi: {user_agent[:100]}")
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								    # Channel ID'yi normalize et
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								    try:
 								        normalized_channel_id = normalize_channel_id(
 								            channel_id=channel_id,
 								            channel=channel,
 								            channel_url=channel_url
 								        )
 								        logger.info(f"[REQUEST] Normalized channel_id: {normalized_channel_id}")
 								    except Exception as e:
 								        logger.error(f"[REQUEST] ❌ Channel ID normalize hatası: {type(e).__name__} - {str(e)}")
 								        normalized_channel_id = None
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
 								    if not normalized_channel_id:
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								        error_msg = 'Channel ID bulunamadı veya geçersiz format'
 								        if channel_url:
 								            error_msg += f'. URL: {channel_url}'
 								        elif channel:
 								            error_msg += f'. Handle: {channel}'
 								        elif channel_id:
 								            error_msg += f'. Channel ID: {channel_id}'
 								        logger.warning(f"[REQUEST] ❌ Channel ID bulunamadı - Raw: channel_id={channel_id_raw}, channel={channel_raw}, channel_url={channel_url_raw}")
 								        logger.warning(f"[REQUEST] ❌ Processed: channel_id={channel_id}, channel={channel}, channel_url={channel_url}")
 								        # RSS okuyucular için daha açıklayıcı hata mesajı
 								        if is_rss_reader:
 								            logger.warning(f"[RSS_READER] Channel ID bulunamadı - URL: {channel_url or channel or channel_id}")
 								            return jsonify({
 								                'error': error_msg,
 								                'message': 'RSS okuyucunuzdan feed eklerken lütfen geçerli bir YouTube kanal URL\'si kullanın',
 								                'received_params': {
 								                    'channel_id': channel_id_raw,
 								                    'channel': channel_raw,
 								                    'channel_url': channel_url_raw,
 								                    'decoded_channel_url': channel_url
 								                },
 								                'example_url': f'{request.url_root}?channel_url=https://www.youtube.com/@username&api_key=YOUR_API_KEY&format=Atom',
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                'usage': {
-												api issues

											
										
										
											2025-11-13 08:51:55 +03:00
+								                    'channel_id': 'UC... (YouTube Channel ID, 24 karakter)',
 								                    'channel': '@username veya username',
 								                    'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...',
 								                    'format': 'Atom veya Rss (varsayılan: Atom)',
 								                    'max_items': 'Maksimum transcript sayısı (varsayılan: 10, maksimum: 100)',
 								                    'api_key': 'API key query parametresi olarak eklenmelidir (RSS okuyucular header gönderemez)'
 								                }
 								            }), 400
 								        return jsonify({
 								            'error': error_msg,
 								            'received_params': {
 								                'channel_id': channel_id_raw,
 								                'channel': channel_raw,
 								                'channel_url': channel_url_raw,
 								                'decoded_channel_url': channel_url
 								            },
 								            'message': 'YouTube Channel ID UC ile başlayan 24 karakter olmalı (sadece alfanumerik ve alt çizgi)',
 								            'usage': {
 								                'channel_id': 'UC... (YouTube Channel ID, 24 karakter)',
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								                'channel': '@username veya username',
 								                'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...',
 								                'format': 'Atom veya Rss (varsayılan: Atom)',
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								                'max_items': 'Maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            }
 								        }), 400
 								    try:
 								        # Kanalı işle
 								        result = process_channel(normalized_channel_id, max_items=max_items)
 								        if not result['videos']:
 								            return jsonify({
 								                'error': 'Henüz işlenmiş video yok',
 								                'channel_id': normalized_channel_id,
-												Transkript ip blocked

											
										
										
											2025-11-13 03:52:26 +03:00
+								                'message': 'Transcript\'ler arka planda işleniyor. Lütfen birkaç dakika sonra tekrar deneyin.',
 								                'note': 'YouTube IP blocking nedeniyle transcript çıkarımı yavaş olabilir. İlk istekte birkaç dakika bekleyin.'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            }), 404
 								        # RSS feed oluştur
 								        channel_info = {
 								            'id': normalized_channel_id,
 								            'title': f"YouTube Transcript Feed - {normalized_channel_id}",
 								            'link': f"https://www.youtube.com/channel/{normalized_channel_id}",
 								            'description': f'Full-text transcript RSS feed for channel {normalized_channel_id}',
 								            'language': 'en'
 								        }
 								        generator = RSSGenerator(channel_info)
 								        for video in result['videos']:
 								            generator.add_video_entry(video)
 								        # Format'a göre döndür
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								        response_headers = {}
 								        if hasattr(g, 'rate_limit_remaining'):
 								            response_headers['X-RateLimit-Remaining'] = str(g.rate_limit_remaining)
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        if format_type == 'rss':
 								            rss_content = generator.generate_rss_string()
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								            response_headers['Content-Type'] = 'application/rss+xml; charset=utf-8'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            return Response(
 								                rss_content,
 								                mimetype='application/rss+xml',
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								                headers=response_headers
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            )
 								        else:  # Atom
 								            # Feedgen Atom desteği
 								            atom_content = generator.generate_atom_string()
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								            response_headers['Content-Type'] = 'application/atom+xml; charset=utf-8'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            return Response(
 								                atom_content,
 								                mimetype='application/atom+xml',
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								                headers=response_headers
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								            )
 								    except Exception as e:
 								        return jsonify({
 								            'error': str(e),
 								            'channel_id': normalized_channel_id
 								        }), 500
 								@app.route('/health', methods=['GET'])
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								@rate_limit(limit_per_minute=120)  # Health check için daha yüksek limit
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								def health():
 								    """Health check endpoint"""
 								    return jsonify({'status': 'ok', 'service': 'YouTube Transcript RSS Feed'})
 								@app.route('/info', methods=['GET'])
-												api key and security

											
										
										
											2025-11-13 03:40:05 +03:00
+								@require_api_key  # API key zorunlu
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								def info():
 								    """API bilgileri"""
 								    return jsonify({
 								        'service': 'YouTube Transcript RSS Feed Generator',
 								        'version': '1.0.0',
 								        'endpoints': {
 								            '/': 'RSS Feed Generator',
 								            '/health': 'Health Check',
 								            '/info': 'API Info'
 								        },
 								        'usage': {
 								            'channel_id': 'UC... (YouTube Channel ID)',
 								            'channel': '@username veya username',
 								            'channel_url': 'Full YouTube channel URL',
 								            'format': 'Atom veya Rss (varsayılan: Atom)',
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            'max_items': 'Her istekte işlenecek maksimum transcript sayısı (varsayılan: 10, maksimum: 100, 20\'şer batch\'ler halinde işlenir)'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        },
 								        'examples': [
 								            '/?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom',
 								            '/?channel=@tavakfi&format=Rss',
-												batch batch

											
										
										
											2025-11-13 04:12:05 +03:00
+								            '/?channel_url=https://www.youtube.com/@tavakfi&format=Atom&max_items=50'
-												first commit

											
										
										
											2025-11-13 03:25:21 +03:00
+								        ]
 								    })
 								if __name__ == '__main__':
 								    app.run(host='0.0.0.0', port=5000, debug=True)