Browse Source

better format detection

thezero 2 years ago
parent
commit
fd981f45e9
3 changed files with 50 additions and 58 deletions
  1. 3 1
      Dockerfile
  2. 2 0
      src/main.py
  3. 45 57
      src/vid_utils.py

+ 3 - 1
Dockerfile

@@ -1,6 +1,8 @@
 FROM python:3.8-slim-buster
 FROM python:3.8-slim-buster
 WORKDIR /bot
 WORKDIR /bot
 COPY requirements.txt .
 COPY requirements.txt .
+RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg; \
+	rm -rf /var/lib/apt/lists/*;
+RUN pip3 install -U youtube-dl
 RUN pip3 install -r requirements.txt
 RUN pip3 install -r requirements.txt
-RUN pip3 install --upgrade youtube-dl
 COPY src src
 COPY src src

+ 2 - 0
src/main.py

@@ -17,6 +17,8 @@ def get_format(update, context):
         video = Video(link=update.message.text, init_keyboard=True)
         video = Video(link=update.message.text, init_keyboard=True)
     except BadLink as e:
     except BadLink as e:
         update.message.reply_text("Bad link: {}".format(e))
         update.message.reply_text("Bad link: {}".format(e))
+    except Exception as e:
+        update.message.reply_text("Error: {}".format(e))
     else:
     else:
         reply_markup = InlineKeyboardMarkup(video.keyboard)
         reply_markup = InlineKeyboardMarkup(video.keyboard)
         update.message.reply_text('Choose format:', reply_markup=reply_markup)
         update.message.reply_text('Choose format:', reply_markup=reply_markup)

+ 45 - 57
src/vid_utils.py

@@ -1,5 +1,7 @@
 import os
 import os
 import logging
 import logging
+from json import loads
+from json.decoder import JSONDecodeError
 from glob import glob, escape
 from glob import glob, escape
 from subprocess import Popen, PIPE
 from subprocess import Popen, PIPE
 from contextlib import contextmanager
 from contextlib import contextmanager
@@ -17,7 +19,7 @@ class BadLink(Exception):
 
 
 class Video:
 class Video:
     def __init__(self, link=None, vid=None, init_keyboard=False):
     def __init__(self, link=None, vid=None, init_keyboard=False):
-        self.db = VidDatabase(os.path.join(os.environ['CONF_FOLDER'], "viddb.sqlite3"))
+        self.db = VidDatabase(os.path.join(os.environ['CONF_FOLDER'], 'viddb.sqlite3'))
         if not self.db.is_valid:
         if not self.db.is_valid:
             # Database file not present
             # Database file not present
             # Create a new database
             # Create a new database
@@ -28,64 +30,76 @@ class Video:
             self.file_name = None
             self.file_name = None
         elif vid is not None and link is None:
         elif vid is not None and link is None:
             self.link, self.code = self.db.select_vid(vid)
             self.link, self.code = self.db.select_vid(vid)
+            self.code, self.audio_only = self.code.split('|')
         else:
         else:
             raise Exception('what is going on?')
             raise Exception('what is going on?')
 
 
         if init_keyboard:
         if init_keyboard:
-            self.formats = self.get_formats()
-            self.keyboard = self.generate_keyboard()
+            try:
+                self.formats = self.get_formats()
+                self.keyboard = self.generate_keyboard()
+            except Exception:
+                raise
 
 
     def get_formats(self):
     def get_formats(self):
         formats = {}
         formats = {}
 
 
-        p = Popen(["youtube-dl", "-F", self.link], stdout=PIPE, stderr=PIPE).communicate()
-        it = iter(str(p[0], 'utf-8').split('\n'))  # iterator of output lines
+        p = Popen(['youtube-dl', '-J', self.link], stdout=PIPE, stderr=PIPE).communicate()
+        if b'ERROR' in p[1]:
+            raise Exception('video URL not supported')
 
 
-        try:
-            while "code  extension" not in next(it):
-                pass  # Remove garbage lines
-        except StopIteration:
-            raise BadLink("youtube-dl couldn't download the link you provided")  # Isn't a valid youtube link
+        video_info = loads(str(p[0], 'utf-8'))
 
 
-        while True:
-            try:
-                line = next(it)
-                if not line:
-                    raise StopIteration  # Usually the last line is empty
-                if "video only" in line:
-                    continue  # I don't need video without audio
-            except StopIteration:
-                break
-            else:
-                format_code, extension, resolution, *_ = line.strip().split()
-                key = '{},{}'.format(extension, resolution)
-                index = self.db.insert_vid(self.link, format_code)
-                formats[key] = index
-        logger.info("Fromats: {}".format(formats))
+        if video_info.get('_type', None) == 'playlist':
+            video_info = video_info.get('entries')[0]
+
+        if video_info.get('formats') is not None:
+            for vid in video_info.get('formats'):
+                self.add_format(formats, vid)
+        else:
+            self.add_format(formats, video_info)
+
+        logger.info('Formats: {}'.format(formats))
         return formats
         return formats
 
 
+    def add_format(self, formats, vid):
+        format_code = vid.get('format_id')
+        extension = vid.get('ext')
+        name = vid.get('format')
+        key = '{},{}'.format(extension, name)
+        code = '{}|{}'.format(format_code, 'audio only' in name)
+        index = self.db.insert_vid(self.link, code)
+        formats[key] = index
+
     def generate_keyboard(self):
     def generate_keyboard(self):
-        """ Generate a list of InlineKeyboardButton of resolutions """
+        ''' Generate a list of InlineKeyboardButton of resolutions '''
         kb = []
         kb = []
 
 
         for key in self.formats.keys():
         for key in self.formats.keys():
-            cb = "{}".format(self.formats[key])
+            cb = '{}'.format(self.formats[key])
             kb.append([InlineKeyboardButton(key, callback_data=cb)])
             kb.append([InlineKeyboardButton(key, callback_data=cb)])
         return kb
         return kb
 
 
     def download(self):
     def download(self):
-        logger.info("Downloading {}".format(self.link))
-        p = Popen(["youtube-dl", "-o", "/bot/out/%(title)s-%(id)s.%(ext)s", "-f", self.code, self.link], stdout=PIPE, stderr=PIPE).communicate()
+        logger.info('Downloading {}'.format(self.link))
+        cmd = ['youtube-dl', '-o', '/bot/out/%(title)s-%(id)s.%(ext)s']
+        if self.audio_only in [False, "False"]:
+            self.code = self.code + '+bestaudio'
+        cmd.extend(['-f', self.code, self.link])
+        logger.info(cmd)
+        p = Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()
 
 
         for line in str(p[0], 'utf-8').split('\n'):
         for line in str(p[0], 'utf-8').split('\n'):
             logger.info(line)
             logger.info(line)
-            if "[download] Destination:" in line:
+            if '[download] Destination:' in line:
                 self.file_name = line[24:]  # name of the file
                 self.file_name = line[24:]  # name of the file
+            if '[ffmpeg] Merging formats into' in line:
+                self.file_name = line[31:-1]  # name of the file
 
 
     def check_dimension(self):
     def check_dimension(self):
         try:
         try:
             if os.path.getsize(self.file_name) > 50 * 1024 * 1023:
             if os.path.getsize(self.file_name) > 50 * 1024 * 1023:
-                Popen(["split", "-b", "49M", self.file_name, self.file_name])
+                Popen(['split', '-b', '49M', self.file_name, self.file_name])
                 os.remove(self.file_name)
                 os.remove(self.file_name)
             return glob(escape(self.file_name) + '*')
             return glob(escape(self.file_name) + '*')
         except AttributeError as e:
         except AttributeError as e:
@@ -98,29 +112,3 @@ class Video:
         yield files
         yield files
         for f in files:  # removing old files
         for f in files:  # removing old files
             os.remove(f)
             os.remove(f)
-
-
-
-
-
-#__________________________OLD STUFFS, TOUCH CAREFULLY__________________________
-
-# this is the soft-split version, require avconv, but the audio isn't synchronized, avconv's problems :(
-'''
-def get_duration(filepath): # get duration in seconds
-    cmd = "avconv -i %s" % filepath
-    p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
-    di = p.communicate()
-    for line in di:
-        if line.rfind(b"Duration") > 0:
-            duration = str(re.findall(b"Duration: (\d+:\d+:[\d.]+)", line)[0])
-    return 3600 * int(duration[2: 4]) + 60 * int(duration[5: 7]) + int(duration[8: 10])
-
-def check_dimension(f): # if f is bigger than 50MB split it in subvideos
-    if os.path.getsize(f) > 50 * 1024 * 1023:
-        duration = get_duration(f)
-        for i in range(0, duration, 180):
-            start = strftime("%H:%M:%S", strptime('{0} {1} {2}'.format(i // 3600, (i // 60) % 60, i % 60), "%H %M %S")) # TODO this is  not pythonic code!
-            os.system("""avconv -i '{0}' -vcodec copy -acodec copy -ss {1} -t {2} 'part_{3}.mp4'""".format(f, start, 180, (i // 180) % 180))
-        os.remove(f) # delete original file
-'''