Videogrep
From Osvidwiki
The idea of videogrep is simple -- apply the same simple text filter logic of GNU/Linux's grep command to an SRT subtitle file to do on the fly text-based editing!
A first version is documented here.
The version documented here uses MLT, and the melt command line tool (debian users: apt-get install melt)
usage / examples[edit]
# NB the 15 is the framerate of the input movie -- it's important that it's right
# (otherwise the timing will be off)
#
# Search for the word woman and convert to a melt command (display on screen)
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15
# Same thing but DO IT (pipe to bash)
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15 | bash
# Save output to file "woman.mp4"
./srtgrep rearwindow.srt "\bwoman\b" | ./srt2melt rearwindow.avi 15 woman.mp4 | bash
srtgrep[edit]
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re, os, sys, codecs
from srt import srtsplit
USAGE = "usage: srtgrep foo.srt \"search regex\""
try:
srtpath = sys.argv[1]
if not os.path.exists(srtpath):
raise AttributeError('SRT not found "%s"' % srtpath)
searchterm = sys.argv[2]
if not searchterm:
raise AttributeError('Missing searchterm')
searchpat = re.compile(searchterm)
except AttributeError, e:
print e; print USAGE; sys.exit()
except IndexError, e:
print USAGE; sys.exit()
def unparse_title (tdata, tbody):
print tdata.get("start"), "-->", tdata.get("end")
print tbody.strip()
print
text = codecs.open(srtpath, "r", "utf-8").read()
titles = srtsplit(text)
for (tdata, tbody) in titles:
if searchpat.search(tbody):
unparse_title(tdata, tbody)
srt2melt[edit]
NB: MELT uses frames and SRT's are represented in seconds. The movie's framerate is necessary to convert. The framerate is hard-coded into this script! Change to match that of your movie. Use ffmpeg -i yourmovie to check the rate.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re, sys
from srt import srtsplit
import timecode
try:
moviepath = sys.argv[1]
framerate = float(sys.argv[2])
except IndexError:
print "missing parameter"
print "usage:"
print " srt2melt movie.avi framerate [outputpath]"
try:
outputpath = sys.argv[3]
except IndexError:
outputpath = None
print 'melt \\'
titles = srtsplit(sys.stdin.read())
for (tdata, tbody) in titles:
start = timecode.timecode_tosecs(tdata.get('start'))
end = timecode.timecode_tosecs(tdata.get('end'))
print '"%s" in=%d out=%d \\' % (moviepath, int(start*framerate), int(end*framerate))
if outputpath:
print "-consumer avformat:"+outputpath
srt.py[edit]
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re
srttimecode_pattern = re.compile(
r"""^
(?# Timecode )
(^
((?P<titlenumber>\d+)\r?\n)?
(?P<start> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)
\s* --> \s*
(?P<end> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)?
\s*)
$""",
re.X|re.M
)
def split (text, pat, removeBlankHead=True):
pos = 0
bodies = []
heads = [None]
for m in pat.finditer(text):
start = m.start()
pre = text[pos:start]
pos = m.end()
bodies.append(pre)
heads.append(m.groupdict())
bodies.append(text[pos:])
if removeBlankHead and bodies[0] == "":
return zip(heads[1:], bodies[1:])
else:
return zip(heads, bodies)
def srtsplit (text, removeBlankHead=True):
return split(text, srttimecode_pattern)
if __name__ == "__main__":
import sys
from pprint import pprint
pprint(srtsplit(sys.stdin.read()))
timecode.py[edit]
# This file is part of Active Archives.
# Copyright 2006-2010 the Active Archives contributors (see AUTHORS)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Also add information on how to contact you by electronic and paper mail.
import math, re
# timecode_pat = re.compile(r"(\d+):(\d+):(\d+)(?:[.,](\d+))?")
timecode_pat = re.compile(r"(?:(\d+):)?(\d+):(\d+)(?:[.,](\d+))?")
def timecode_fromsecs(rawsecs, fract=True, alwaysfract = False, fractdelim = ',', alwayshours=False):
# returns a string in HH:MM:SS[.xxx] notation
# if fract is True, uses .xxx if either necessary (non-zero) OR alwaysfract is True
hours = math.floor(rawsecs / 3600)
rawsecs -= hours*3600
mins = math.floor(rawsecs / 60)
rawsecs -= mins*60
if fract:
secs = math.floor(rawsecs)
rawsecs -= secs
if (rawsecs > 0 or alwaysfract):
fract = "%.03f" % rawsecs
if hours or alwayshours:
return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, fract[2:])
else:
return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
else:
if hours or alwayshours:
return "%02d:%02d:%02d" % (hours, mins, secs)
else:
return "%02d:%02d" % (mins, secs)
else:
secs = round(rawsecs)
if hours or alwayshours:
return "%02d:%02d:%02d" % (hours, mins, secs)
else:
return "%02d:%02d" % (mins, secs)
def timecode_tosecs(tcstr):
r = timecode_pat.search(tcstr)
if r:
ret = 0
if r.group(1):
ret += 3600 * int(r.group(1))
ret += 60 * int(r.group(2))
ret += int(r.group(3))
if (r.group(4)):
ret = float(str(ret)+"."+r.group(4))
return ret
else:
return None
def parse2secs (val):
try:
return float(val)
except ValueError:
return timecode_tosecs(val)
## to accept None
# except TypeError:
# return
if __name__ == "__main__":
def t(x):
# with fraction
s = timecode_fromsecs(x, True, False)
print x, "=>", s, "=>", timecode_tosecs(s)
# without fraction
s = timecode_fromsecs(x, False)
print x, "=>", s, "=>", timecode_tosecs(s)
t(0)
t(59.666666666666666)
t(60)
t(60.0)
t(1235/3.0)
t(10000.5)