Videogrep

The idea of videogrep is simple -- apply the same simple text filter logic of GNU/Linux's grep command to an SRT subtitle file to do on the fly text-based editing!

A first version is documented here.

The version documented here uses MLT, and the melt command line tool (debian users: apt-get install melt)

usage / examples[edit]

# NB the 15 is the framerate of the input movie -- it's important that it's right
# (otherwise the timing will be off)
#

# Search for the word woman and convert to a melt command (display on screen)
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15

# Same thing but DO IT (pipe to bash)
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15 | bash

# Save output to file "woman.mp4"
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15 woman.mp4 | bash

srtgrep[edit]

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import re, os, sys, codecs
from srt import srtsplit

USAGE = "usage: srtgrep foo.srt \"search regex\""

try:
    srtpath = sys.argv[1]
    if not os.path.exists(srtpath):
        raise AttributeError('SRT not found "%s"' % srtpath)
    searchterm = sys.argv[2]
    if not searchterm:
        raise AttributeError('Missing searchterm')
    searchpat = re.compile(searchterm)
except AttributeError, e:
    print e; print USAGE; sys.exit()
except IndexError, e:
    print USAGE; sys.exit()
    
def unparse_title (tdata, tbody):
    print tdata.get("start"), "-->", tdata.get("end")
    print tbody.strip()
    print
  
text = codecs.open(srtpath, "r", "utf-8").read()
titles = srtsplit(text)

for (tdata, tbody) in titles:
    if searchpat.search(tbody):
        unparse_title(tdata, tbody)

srt2melt[edit]

NB: MELT uses frames and SRT's are represented in seconds. The movie's framerate is necessary to convert. The framerate is hard-coded into this script! Change to match that of your movie. Use ffmpeg -i yourmovie to check the rate.

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import re, sys
from srt import srtsplit
import timecode

try:
    moviepath = sys.argv[1]
    framerate = float(sys.argv[2])
except IndexError:
    print "missing parameter"
    print "usage:"
    print "    srt2melt movie.avi framerate [outputpath]"

try:
    outputpath = sys.argv[3]
except IndexError:
    outputpath = None

print 'melt \\'
titles = srtsplit(sys.stdin.read())

for (tdata, tbody) in titles:
    start = timecode.timecode_tosecs(tdata.get('start'))
    end = timecode.timecode_tosecs(tdata.get('end'))
    print '"%s" in=%d out=%d \\' % (moviepath, int(start*framerate), int(end*framerate))

if outputpath:
    print "-consumer avformat:"+outputpath

srt.py[edit]

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re

srttimecode_pattern = re.compile(
    r"""^
    (?# Timecode )
    (^

    ((?P<titlenumber>\d+)\r?\n)?

    (?P<start> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)
    \s* --> \s*
    (?P<end> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)?

    \s*)

    $""",
    re.X|re.M
)

def split (text, pat, removeBlankHead=True):
    pos = 0
    bodies = []
    heads = [None]
    for m in pat.finditer(text):
        start = m.start()
        pre = text[pos:start]
        pos = m.end()
        bodies.append(pre)
        heads.append(m.groupdict())
    bodies.append(text[pos:])
    if removeBlankHead and bodies[0] == "":
        return zip(heads[1:], bodies[1:])
    else:
        return zip(heads, bodies)

def srtsplit (text, removeBlankHead=True):
    return split(text, srttimecode_pattern)

if __name__ == "__main__":
    import sys
    from pprint import pprint
    pprint(srtsplit(sys.stdin.read()))

timecode.py[edit]

# This file is part of Active Archives.
# Copyright 2006-2010 the Active Archives contributors (see AUTHORS)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# Also add information on how to contact you by electronic and paper mail.


import math, re

# timecode_pat = re.compile(r"(\d+):(\d+):(\d+)(?:[.,](\d+))?")
timecode_pat = re.compile(r"(?:(\d+):)?(\d+):(\d+)(?:[.,](\d+))?")

def timecode_fromsecs(rawsecs, fract=True, alwaysfract = False, fractdelim = ',', alwayshours=False):
    # returns a string in HH:MM:SS[.xxx] notation
    # if fract is True, uses .xxx if either necessary (non-zero) OR alwaysfract is True
    hours = math.floor(rawsecs / 3600)
    rawsecs -= hours*3600
    mins = math.floor(rawsecs / 60)
    rawsecs -= mins*60
    if fract:
        secs = math.floor(rawsecs)
        rawsecs -= secs
        if (rawsecs > 0 or alwaysfract):
            fract = "%.03f" % rawsecs
            if hours or alwayshours:
                return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, fract[2:])
            else:
                return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
        else:
            if hours or alwayshours:
                return "%02d:%02d:%02d" % (hours, mins, secs)
            else:
                return "%02d:%02d" % (mins, secs)
            
    else:
        secs = round(rawsecs)
        if hours or alwayshours:
            return "%02d:%02d:%02d" % (hours, mins, secs)
        else:
            return "%02d:%02d" % (mins, secs)

def timecode_tosecs(tcstr):
    r = timecode_pat.search(tcstr)    
    if r:
        ret = 0
        if r.group(1):
            ret += 3600 * int(r.group(1))
        ret += 60 * int(r.group(2))
        ret += int(r.group(3))
        if (r.group(4)):
            ret = float(str(ret)+"."+r.group(4))
        return ret
    else:
        return None

def parse2secs (val):
    try:
        return float(val)
    except ValueError:
        return timecode_tosecs(val)
## to accept None
#    except TypeError:
#        return

if __name__ == "__main__":
    def t(x):
        # with fraction
        s = timecode_fromsecs(x, True, False)
        print x, "=>", s, "=>", timecode_tosecs(s)
        # without fraction
        s = timecode_fromsecs(x, False)
        print x, "=>", s, "=>", timecode_tosecs(s)
    
    t(0)
    t(59.666666666666666)
    t(60)
    t(60.0)
    t(1235/3.0)
    t(10000.5)

Videogrep

Contents

usage / examples[edit]

srtgrep[edit]

srt2melt[edit]

srt.py[edit]

timecode.py[edit]

Navigation menu

Views

Personal tools

Navigation

Search

Tools