2 Get information about a movie from IMDb
4 Copyright © 2009-2010 VideoLAN and AUTHORS
6 Authors: Jean-Philippe André (jpeg@videolan.org)
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 -- TODO: Use simplexml module to simplify parsing
28 titles = {} -- table, see code below
30 -- Some global variables: widgets
32 txt = nil -- text field
33 list = nil -- list widget
34 button_open = nil -- button widget
35 html = nil -- rich text (HTML) widget
36 waitlbl = nil -- text label widget
38 -- Script descriptor, called when the extensions are scanned
40 return { title = "IMDb - The Internet Movie Database" ;
42 author = "Jean-Philippe André" ;
43 url = 'http://www.imdb.org/';
44 shortdesc = "The Internet Movie Database";
45 description = "<center><b>The Internet Movie Database</b></center><br />"
46 .. "Get information about movies from the Internet "
47 .. "Movie Database (IMDb).<br />This Extension will show "
48 .. "you the cast, a short plot summary and a link to "
49 .. "the web page on imdb.org." ;
50 capabilities = { "input-listener" } }
53 -- Remove trailing & leading spaces
55 if not str then return "" end
56 return string.gsub(str, "^%s*(.*)+%s$", "%1")
59 -- Update title text field. Removes file extensions.
60 function update_title()
61 local item = vlc.input.item()
62 local name = item and item:name()
64 name = string.gsub(name, "(.*)(%.%w+)$", "%1")
67 txt:set_text(trim(name))
71 -- Function called when the input (media being read) changes
72 function input_changed()
76 -- First function to be called when the extension is activated
81 -- This function is called when the extension is disabled
85 -- Create the main dialog with a simple search bar
86 function create_dialog()
87 dlg = vlc.dialog("IMDb")
88 dlg:add_label("<b>Movie Title:</b>", 1, 1, 1, 1)
89 local item = vlc.input.item()
90 txt = dlg:add_text_input(item and item:name() or "", 2, 1, 1, 1)
91 dlg:add_button("Search", click_okay, 3, 1, 1, 1)
92 -- Show, if not already visible
98 -- Deactivate this extension
102 -- Called when the user presses the "Search" button
103 function click_okay()
104 vlc.msg.dbg("[IMDb] Searching for " .. txt:get_text())
106 -- Search IMDb: build URL
107 title = string.gsub(string.gsub(txt:get_text(), "[%p%s%c]", "+"), "%++", " ")
108 url = "http://www.imdb.com/find?s=all&q=" .. string.gsub(title, " ", "+")
110 -- Recreate dialog structure: delete useless widgets
118 dlg:del_widget(button_open)
123 -- Ask the user to wait some time...
124 local waitmsg = 'Searching for <a href="' .. url .. '">' .. title .. "</a> on IMDb..."
126 waitlbl = dlg:add_label(waitmsg, 1, 2, 3, 1)
128 waitlbl:set_text(waitmsg)
133 local s, msg = vlc.stream(url)
135 vlc.msg.warn("[IMDb] " .. msg)
136 waitlbl:set_text('Sorry, an error occured while searching for <a href="'
137 .. url .. '">' .. title .. "</a>.<br />Please try again later.")
142 local data = s:read(65000)
144 vlc.msg.warn("[IMDb] Not data received!")
145 waitlbl:set_text('Sorry, an error occured while searching for <a href="'
146 .. url .. '">' .. title .. "</a>.<br />Please try again later.")
150 -- Probe result & parse it
151 if string.find(data, "<h6>Overview</h6>") then
152 -- We found a direct match
153 parse_moviepage(data)
155 -- We have a list of results to parse
156 parse_resultspage(data)
160 -- Called when clicked on the "Open" button
161 function click_open()
162 -- Get user selection
163 selection = list:get_selection()
164 if not selection then return end
167 for idx, selectedItem in pairs(selection) do
171 if not sel then return end
172 local imdbID = titles[sel].id
174 -- Update information message
175 url = "http://www.imdb.org/title/" .. imdbID .. "/"
176 title = titles[sel].title
179 dlg:del_widget(button_open)
182 waitlbl:set_text("Loading IMDb page for <a href=\"" .. url .. "\">" .. title .. "</a>.")
185 local s, msg = vlc.stream(url)
187 waitlbl:set_text('Sorry, an error occured while looking for <a href="'
188 .. url .. '">' .. title .. "</a>.")
189 vlc.msg.warn("[IMDb] " .. msg)
194 if data and string.find(data, "<h6>Overview</h6>") then
195 parse_moviepage(data)
197 waitlbl:set_text('Sorry, no results found for <a href="'
198 .. url .. '">' .. title .. "</a>.")
202 -- Parse the results page and find titles, years & URL's
203 function parse_resultspage(data)
204 vlc.msg.dbg("[IMDb] Analysing results page")
211 while idxEnd ~= nil do
213 local titleType = nil
214 _, idxEnd, titleType = string.find(data, "<b>([^<]*Titles[^<]*)</b>", idxEnd)
215 local _, _, nextTitle = string.find(data, "<b>([^<]*Titles[^<]*)</b>", idxEnd)
216 if not titleType then
219 -- Find current scope
221 if not nextTitle then
222 _, _, table = string.find(data, "<table>(.*)</table>", idxEnd)
224 nextTitle = string.gsub(nextTitle, "%(", "%%(")
225 nextTitle = string.gsub(nextTitle, "%)", "%%)")
226 _, _, table = string.find(data, "<table>(.*)</table>.*"..nextTitle, idxEnd)
229 if not table then break end
231 local thistitle = nil
233 -- Find all titles in this scope
235 local _, _, link = string.find(table, "<a href=\"([^\"]+title[^\"]+)\"", pos)
236 if not link then break end -- this would not be normal behavior...
237 _, pos, thistitle = string.find(table, "<a href=\"" .. link .. "\"[^>]*>([^<]+)</a>", pos)
238 if not thistitle then break end -- this would not be normal behavior...
239 local _, _, year = string.find(table, "\((%d+)\)", pos)
240 -- Add this title to the list
242 local _, _, imdbID = string.find(link, "/([^/]+)/$")
243 thistitle = replace_html_chars(thistitle)
244 titles[count] = { id = imdbID ; title = thistitle ; year = year ; link = link }
249 -- Did we find anything at all?
250 if not count or count == 0 then
251 waitlbl:set_text('Sorry, no results found for <a href="'
252 .. url .. '">' .. title .. "</a>.")
256 -- Sounds good, we found some results, let's display them
257 waitlbl:set_text(count .. " results found for <a href=\"" .. url .. "\">" .. title .. "</a>.")
258 list = dlg:add_list(1, 3, 3, 1)
259 button_open = dlg:add_button("Open", click_open, 3, 4, 1, 1)
261 for idx, title in ipairs(titles) do
262 --list:add_value("[" .. title.id .. "] " .. title.title .. " (" .. title.year .. ")", idx)
263 list:add_value(title.title .. " (" .. title.year .. ")", idx)
267 -- Parse a movie description page
268 function parse_moviepage(data)
270 title = string.gsub(data, "^.*<title>(.*)</title>.*$", "%1")
271 local text = "<h1>" .. title .. "</h1>"
272 text = text .. "<h2>Overview</h2><table>"
275 url = string.gsub(data, "^.*<link rel=\"canonical\" href=\"([^\"]+)\".*$", "%1")
276 local imdbID = string.gsub(url, "^.*/title/([^/]+)/.*$", "%1")
278 url = "http://www.imdb.org/title/" .. imdbID .. "/"
283 _, nextIdx, _ = string.find(data, "<div id=\"director-info\"", 1, true)
285 _, _, director = string.find(data, "<a href[^>]+>([%w%s]+)</a>", nextIdx)
288 director = "(Unknown)"
290 text = text .. "<tr><td><b>Director</b></td><td>" .. director .. "</td></tr>"
293 local genres = "<tr><td><b>Genres</b></td>"
295 for genre, _ in string.gmatch(data, "/Sections/Genres/(%w+)/\">") do
297 genres = genres .. "<td>" .. genre .. "</td></tr>"
299 genres = genres .. "<tr><td /><td>" .. genre .. "</td></tr>"
303 text = text .. genres
306 local actors = "<tr><td><b>Cast</b></td>"
308 for nm, char in string.gmatch(data, "<td class=\"nm\"><a[^>]+>([%w%s]+)</a></td><td class=\"ddd\"> ... </td><td class=\"char\"><a[^>]+>([%w%s]+)</a>") do
310 actors = actors .. "<tr><td />"
312 actors = actors .. "<td>" .. nm .. "</td><td><i>" .. char .. "</i></td></tr>"
315 text = text .. actors .. "</table>"
317 waitlbl:set_text("<center><a href=\"" .. url .. "\">" .. title .. "</a></center>")
320 dlg:del_widget(button_open)
322 html = dlg:add_html(text .. "<br />Loading summary...", 1, 3, 3, 1)
325 text = text .. "<h2>Plot Summary</h2>"
326 local s, msg = vlc.stream(url .. "plotsummary")
328 vlc.msg.warn("[IMDb] " .. msg)
331 local data = s:read(65000)
333 -- We read only the first summary
334 _, _, summary = string.find(data, "<p class=\"plotpar\">([^<]+)")
336 summary = "(Unknown)"
338 text = text .. "<p>" .. summary .. "</p>"
339 text = text .. "<p><h2>Source IMDb</h2><a href=\"" .. url .. "\">" .. url .. "</a></p>"
344 -- Convert some HTML characters into UTF8
345 function replace_html_chars(txt)
346 if not txt then return nil end
347 -- return vlc.strings.resolve_xml_special_chars(txt)
348 for num in string.gmatch(txt, "&#x(%x+);") do
349 -- Convert to decimal (any better way?)
351 for c in string.gmatch(num, "%x") do
352 cc = string.byte(c) - string.byte("0")
353 if (cc >= 10 or cc < 0) then
354 cc = string.byte(string.lower(c)) - string.byte("a") + 10
358 txt = string.gsub(txt, "&#x" .. num .. ";", string.char(dec))