""" Flask web server - RSS-Bridge benzeri URL template sistemi """ from flask import Flask, request, Response, jsonify from typing import Optional import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) from src.database import Database from src.video_fetcher import fetch_videos_from_rss_bridge, get_channel_id_from_handle, extract_video_id from src.transcript_extractor import TranscriptExtractor from src.transcript_cleaner import TranscriptCleaner from src.rss_generator import RSSGenerator app = Flask(__name__) # Global instances (lazy loading) db = None extractor = None cleaner = None def get_db(): """Database instance'ı al (singleton)""" global db if db is None: db = Database() db.init_database() return db def get_extractor(): """Transcript extractor instance'ı al""" global extractor if extractor is None: extractor = TranscriptExtractor() return extractor def get_cleaner(): """Transcript cleaner instance'ı al""" global cleaner if cleaner is None: cleaner = TranscriptCleaner() return cleaner def normalize_channel_id(channel_id: Optional[str] = None, channel: Optional[str] = None, channel_url: Optional[str] = None) -> Optional[str]: """ Farklı formatlardan channel ID'yi normalize et Args: channel_id: Direkt Channel ID (UC...) channel: Channel handle (@username) veya username channel_url: Full YouTube channel URL Returns: Normalize edilmiş Channel ID veya None """ # Direkt Channel ID varsa if channel_id: if channel_id.startswith('UC') and len(channel_id) == 24: return channel_id # Eğer URL formatında ise parse et if 'youtube.com/channel/' in channel_id: parts = channel_id.split('/channel/') if len(parts) > 1: return parts[-1].split('?')[0].split('/')[0] # Channel handle (@username) if channel: if not channel.startswith('@'): channel = f"@{channel}" handle_url = f"https://www.youtube.com/{channel}" return get_channel_id_from_handle(handle_url) # Channel URL if channel_url: # Handle URL if '/@' in channel_url: return get_channel_id_from_handle(channel_url) # Channel ID URL elif '/channel/' in channel_url: parts = channel_url.split('/channel/') if len(parts) > 1: return parts[-1].split('?')[0].split('/')[0] return None def process_channel(channel_id: str, max_items: int = 50) -> dict: """ Kanal için transcript feed'i oluştur Returns: RSS feed string ve metadata """ db = get_db() extractor = get_extractor() cleaner = get_cleaner() # RSS-Bridge'den videoları çek try: videos = fetch_videos_from_rss_bridge( base_url="https://rss-bridge.org/bridge01", channel_id=channel_id, format="Atom", max_items=max_items ) except Exception as e: raise Exception(f"RSS-Bridge hatası: {e}") # Yeni videoları veritabanına ekle for video in videos: video['channel_id'] = channel_id if not db.is_video_processed(video['video_id']): db.add_video(video) # Bekleyen videoları işle (ilk 20) pending_videos = db.get_pending_videos()[:20] for video in pending_videos: if video['channel_id'] != channel_id: continue try: # Transcript çıkar transcript = extractor.fetch_transcript( video['video_id'], languages=['tr', 'en'] ) if transcript: # Transcript temizle raw, clean = cleaner.clean_transcript(transcript, sentences_per_paragraph=3) # Veritabanına kaydet db.update_video_transcript( video['video_id'], raw, clean, status=1, language='tr' ) except Exception as e: print(f"Transcript çıkarım hatası {video['video_id']}: {e}") db.mark_video_failed(video['video_id'], str(e)) # İşlenmiş videoları getir processed_videos = db.get_processed_videos( limit=max_items, channel_id=channel_id ) return { 'videos': processed_videos, 'channel_id': channel_id, 'count': len(processed_videos) } @app.route('/', methods=['GET']) def generate_feed(): """ RSS-Bridge benzeri URL template: Örnekler: - /?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom - /?channel=@tavakfi&format=Atom - /?channel_url=https://www.youtube.com/@tavakfi&format=Atom """ # Query parametrelerini al channel_id = request.args.get('channel_id') channel = request.args.get('channel') # @username veya username channel_url = request.args.get('channel_url') format_type = request.args.get('format', 'Atom').lower() # Atom veya Rss max_items = int(request.args.get('max_items', 50)) # Channel ID'yi normalize et normalized_channel_id = normalize_channel_id( channel_id=channel_id, channel=channel, channel_url=channel_url ) if not normalized_channel_id: return jsonify({ 'error': 'Channel ID bulunamadı', 'usage': { 'channel_id': 'UC... (YouTube Channel ID)', 'channel': '@username veya username', 'channel_url': 'https://www.youtube.com/@username veya https://www.youtube.com/channel/UC...', 'format': 'Atom veya Rss (varsayılan: Atom)', 'max_items': 'Maksimum video sayısı (varsayılan: 50)' } }), 400 try: # Kanalı işle result = process_channel(normalized_channel_id, max_items=max_items) if not result['videos']: return jsonify({ 'error': 'Henüz işlenmiş video yok', 'channel_id': normalized_channel_id, 'message': 'Lütfen birkaç dakika sonra tekrar deneyin' }), 404 # RSS feed oluştur channel_info = { 'id': normalized_channel_id, 'title': f"YouTube Transcript Feed - {normalized_channel_id}", 'link': f"https://www.youtube.com/channel/{normalized_channel_id}", 'description': f'Full-text transcript RSS feed for channel {normalized_channel_id}', 'language': 'en' } generator = RSSGenerator(channel_info) for video in result['videos']: generator.add_video_entry(video) # Format'a göre döndür if format_type == 'rss': rss_content = generator.generate_rss_string() return Response( rss_content, mimetype='application/rss+xml', headers={'Content-Type': 'application/rss+xml; charset=utf-8'} ) else: # Atom # Feedgen Atom desteği atom_content = generator.generate_atom_string() return Response( atom_content, mimetype='application/atom+xml', headers={'Content-Type': 'application/atom+xml; charset=utf-8'} ) except Exception as e: return jsonify({ 'error': str(e), 'channel_id': normalized_channel_id }), 500 @app.route('/health', methods=['GET']) def health(): """Health check endpoint""" return jsonify({'status': 'ok', 'service': 'YouTube Transcript RSS Feed'}) @app.route('/info', methods=['GET']) def info(): """API bilgileri""" return jsonify({ 'service': 'YouTube Transcript RSS Feed Generator', 'version': '1.0.0', 'endpoints': { '/': 'RSS Feed Generator', '/health': 'Health Check', '/info': 'API Info' }, 'usage': { 'channel_id': 'UC... (YouTube Channel ID)', 'channel': '@username veya username', 'channel_url': 'Full YouTube channel URL', 'format': 'Atom veya Rss (varsayılan: Atom)', 'max_items': 'Maksimum video sayısı (varsayılan: 50)' }, 'examples': [ '/?channel_id=UC9h8BDcXwkhZtnqoQJ7PggA&format=Atom', '/?channel=@tavakfi&format=Rss', '/?channel_url=https://www.youtube.com/@tavakfi&format=Atom&max_items=100' ] }) if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True)