From fd981f45e95c193dc82dc7a2629a61adbb05103d Mon Sep 17 00:00:00 2001
From: thezero <io@thezero.org>
Date: Sun, 4 Jul 2021 16:00:07 +0200
Subject: [PATCH] better format detection

---
 Dockerfile       |   4 +-
 src/main.py      |   2 +
 src/vid_utils.py | 102 +++++++++++++++++++++--------------------------
 3 files changed, 50 insertions(+), 58 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c5c7db0..68c27aa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,8 @@
 FROM python:3.8-slim-buster
 WORKDIR /bot
 COPY requirements.txt .
+RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg; \
+	rm -rf /var/lib/apt/lists/*;
+RUN pip3 install -U youtube-dl
 RUN pip3 install -r requirements.txt
-RUN pip3 install --upgrade youtube-dl
 COPY src src
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
index 41fe769..1a2ec43 100644
--- a/src/main.py
+++ b/src/main.py
@@ -17,6 +17,8 @@ def get_format(update, context):
         video = Video(link=update.message.text, init_keyboard=True)
     except BadLink as e:
         update.message.reply_text("Bad link: {}".format(e))
+    except Exception as e:
+        update.message.reply_text("Error: {}".format(e))
     else:
         reply_markup = InlineKeyboardMarkup(video.keyboard)
         update.message.reply_text('Choose format:', reply_markup=reply_markup)
diff --git a/src/vid_utils.py b/src/vid_utils.py
index 0a86fe7..add7548 100644
--- a/src/vid_utils.py
+++ b/src/vid_utils.py
@@ -1,5 +1,7 @@
 import os
 import logging
+from json import loads
+from json.decoder import JSONDecodeError
 from glob import glob, escape
 from subprocess import Popen, PIPE
 from contextlib import contextmanager
@@ -17,7 +19,7 @@ class BadLink(Exception):
 
 class Video:
     def __init__(self, link=None, vid=None, init_keyboard=False):
-        self.db = VidDatabase(os.path.join(os.environ['CONF_FOLDER'], "viddb.sqlite3"))
+        self.db = VidDatabase(os.path.join(os.environ['CONF_FOLDER'], 'viddb.sqlite3'))
         if not self.db.is_valid:
             # Database file not present
             # Create a new database
@@ -28,64 +30,76 @@ class Video:
             self.file_name = None
         elif vid is not None and link is None:
             self.link, self.code = self.db.select_vid(vid)
+            self.code, self.audio_only = self.code.split('|')
         else:
             raise Exception('what is going on?')
 
         if init_keyboard:
-            self.formats = self.get_formats()
-            self.keyboard = self.generate_keyboard()
+            try:
+                self.formats = self.get_formats()
+                self.keyboard = self.generate_keyboard()
+            except Exception:
+                raise
 
     def get_formats(self):
         formats = {}
 
-        p = Popen(["youtube-dl", "-F", self.link], stdout=PIPE, stderr=PIPE).communicate()
-        it = iter(str(p[0], 'utf-8').split('\n'))  # iterator of output lines
+        p = Popen(['youtube-dl', '-J', self.link], stdout=PIPE, stderr=PIPE).communicate()
+        if b'ERROR' in p[1]:
+            raise Exception('video URL not supported')
 
-        try:
-            while "code  extension" not in next(it):
-                pass  # Remove garbage lines
-        except StopIteration:
-            raise BadLink("youtube-dl couldn't download the link you provided")  # Isn't a valid youtube link
+        video_info = loads(str(p[0], 'utf-8'))
 
-        while True:
-            try:
-                line = next(it)
-                if not line:
-                    raise StopIteration  # Usually the last line is empty
-                if "video only" in line:
-                    continue  # I don't need video without audio
-            except StopIteration:
-                break
-            else:
-                format_code, extension, resolution, *_ = line.strip().split()
-                key = '{},{}'.format(extension, resolution)
-                index = self.db.insert_vid(self.link, format_code)
-                formats[key] = index
-        logger.info("Fromats: {}".format(formats))
+        if video_info.get('_type', None) == 'playlist':
+            video_info = video_info.get('entries')[0]
+
+        if video_info.get('formats') is not None:
+            for vid in video_info.get('formats'):
+                self.add_format(formats, vid)
+        else:
+            self.add_format(formats, video_info)
+
+        logger.info('Formats: {}'.format(formats))
         return formats
 
+    def add_format(self, formats, vid):
+        format_code = vid.get('format_id')
+        extension = vid.get('ext')
+        name = vid.get('format')
+        key = '{},{}'.format(extension, name)
+        code = '{}|{}'.format(format_code, 'audio only' in name)
+        index = self.db.insert_vid(self.link, code)
+        formats[key] = index
+
     def generate_keyboard(self):
-        """ Generate a list of InlineKeyboardButton of resolutions """
+        ''' Generate a list of InlineKeyboardButton of resolutions '''
         kb = []
 
         for key in self.formats.keys():
-            cb = "{}".format(self.formats[key])
+            cb = '{}'.format(self.formats[key])
             kb.append([InlineKeyboardButton(key, callback_data=cb)])
         return kb
 
     def download(self):
-        logger.info("Downloading {}".format(self.link))
-        p = Popen(["youtube-dl", "-o", "/bot/out/%(title)s-%(id)s.%(ext)s", "-f", self.code, self.link], stdout=PIPE, stderr=PIPE).communicate()
+        logger.info('Downloading {}'.format(self.link))
+        cmd = ['youtube-dl', '-o', '/bot/out/%(title)s-%(id)s.%(ext)s']
+        if self.audio_only in [False, "False"]:
+            self.code = self.code + '+bestaudio'
+        cmd.extend(['-f', self.code, self.link])
+        logger.info(cmd)
+        p = Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()
 
         for line in str(p[0], 'utf-8').split('\n'):
             logger.info(line)
-            if "[download] Destination:" in line:
+            if '[download] Destination:' in line:
                 self.file_name = line[24:]  # name of the file
+            if '[ffmpeg] Merging formats into' in line:
+                self.file_name = line[31:-1]  # name of the file
 
     def check_dimension(self):
         try:
             if os.path.getsize(self.file_name) > 50 * 1024 * 1023:
-                Popen(["split", "-b", "49M", self.file_name, self.file_name])
+                Popen(['split', '-b', '49M', self.file_name, self.file_name])
                 os.remove(self.file_name)
             return glob(escape(self.file_name) + '*')
         except AttributeError as e:
@@ -98,29 +112,3 @@ class Video:
         yield files
         for f in files:  # removing old files
             os.remove(f)
-
-
-
-
-
-#__________________________OLD STUFFS, TOUCH CAREFULLY__________________________
-
-# this is the soft-split version, require avconv, but the audio isn't synchronized, avconv's problems :(
-'''
-def get_duration(filepath): # get duration in seconds
-    cmd = "avconv -i %s" % filepath
-    p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
-    di = p.communicate()
-    for line in di:
-        if line.rfind(b"Duration") > 0:
-            duration = str(re.findall(b"Duration: (\d+:\d+:[\d.]+)", line)[0])
-    return 3600 * int(duration[2: 4]) + 60 * int(duration[5: 7]) + int(duration[8: 10])
-
-def check_dimension(f): # if f is bigger than 50MB split it in subvideos
-    if os.path.getsize(f) > 50 * 1024 * 1023:
-        duration = get_duration(f)
-        for i in range(0, duration, 180):
-            start = strftime("%H:%M:%S", strptime('{0} {1} {2}'.format(i // 3600, (i // 60) % 60, i % 60), "%H %M %S")) # TODO this is  not pythonic code!
-            os.system("""avconv -i '{0}' -vcodec copy -acodec copy -ss {1} -t {2} 'part_{3}.mp4'""".format(f, start, 180, (i // 180) % 180))
-        os.remove(f) # delete original file
-'''