]> git.sesse.net Git - vlc/blobdiff - share/lua/playlist/youtube.lua
youtube.lua: decode uri/xml-chars on name
[vlc] / share / lua / playlist / youtube.lua
index 8823925658587bafdf08b911ad534245e991f4e3..d7b5e7bd1c3314b2a2cc1910e4ccec13f4a72afe 100644 (file)
@@ -59,12 +59,15 @@ end
 function parse()
     if string.match( vlc.path, "watch%?v=" )
     then -- This is the HTML page's URL
+        -- fmt is the format of the video: 18 is HQ (mp4)
+        fmt = get_url_param( vlc.path, "fmt" )
         while true do
             -- Try to find the video's title
             line = vlc.readline()
             if not line then break end
             if string.match( line, "<meta name=\"title\"" ) then
                 _,_,name = string.find( line, "content=\"(.-)\"" )
+                name = vlc.strings.resolve_xml_special_chars( name )
             end
             if string.match( line, "<meta name=\"description\"" ) then
                -- Don't ask me why they double encode ...
@@ -84,35 +87,72 @@ function parse()
                 _,_,t = string.find( line, "\"t\": \"(.-)\"" )
                 -- vlc.msg.err( t )
                 -- video_id = string.gsub( line, ".*&video_id:'([^']*)'.*", "%1" )
+                fmt_url_map = string.match( line, "\"fmt_url_map\": \"(.-)\"" )
+                if fmt_url_map then
+                    for itag,url in string.gmatch( fmt_url_map, "(%d+)|([^,]+)" ) do
+                        -- Apparently formats are listed in quality order,
+                        -- so we can afford to simply take the first one
+                        if not fmt or tonumber( itag ) == tonumber( fmt ) then
+                            path = url
+                            break
+                        end
+                    end
+                end
             -- Also available on non-HTML5 pages: var swfHTML = (isIE) ? "<object [...]><param name=\"flashvars\" value=\"rv.2.thumbnailUrl=http%3A%2F%2Fi4.ytimg.com%2Fvi%2F3MLp7YNTznE%2Fdefault.jpg&rv.7.length_seconds=384 [...] &video_id=OHVvVmUNBFc [...] &t=OEgsToPDskK3zO44y0QN8Fr5ZSAZwCQp [...]
             elseif string.match( line, "swfHTML" ) and string.match( line, "video_id" ) then
                 _,_,t = string.find( line, "&t=(.-)&" )
+            -- Also available in HTML5 pages: videoPlayer.setAvailableFormat("http://v6.lscache4.c.youtube.com/videoplayback?ip=82.0.0.0&sparams=id%2Cexpire%2Cip%2Cipbits%2Citag%2Calgorithm%2Cburst%2Cfactor&algorithm=throttle-factor&itag=45&ipbits=8&burst=40&sver=3&expire=1275688800&key=yt1&signature=6ED860441298D1157FF3013A5D72727F25831F09.4C196BEA9F8F9B83CE678D79AD918B83D5E98B46&factor=1.25&id=7117715cf57d18d4", "video/webm; codecs=&quot;vp8.0, vorbis&quot;", "hd720");
+            elseif string.match( line, "videoPlayer%.setAvailableFormat" ) then
+                url,itag = string.match( line, "videoPlayer%.setAvailableFormat%(\"(.-itag=(%d+).-)\",.+%)" )
+                if url then
+                    -- For now, WebM formats are listed only in the HTML5
+                    -- section, that is also only when HTML5 is enabled.
+                    -- Format 45 is 720p, and 43 is lower resolution.
+                    if tonumber( itag ) == 45  or ( tonumber( itag ) == 43 and not webm_path ) then
+                        webm_path = url
+                    end
+                    -- Grab something if fmt_url_map failed
+                    if not path and ( not fmt or tonumber( itag ) == tonumber( fmt ) ) then
+                        path = url
+                    end
+                end
             end
-            if name and description and artist --[[and video_id]] then break end
         end
+
         if not video_id then
             video_id = get_url_param( vlc.path, "v" )
         end
-        if not base_yt_url then
-            base_yt_url = "http://youtube.com/"
-        end
         arturl = get_arturl( vlc.path, video_id )
-        -- fmt is the format of the video: 18 is HQ (mp4)
-        fmt = get_url_param( vlc.path, "fmt" )
-        if fmt then
-            format = "&fmt=" .. fmt
-        else
-            format = ""
+
+        if not fmt then
+            -- Prefer WebM formats if this is an &html5=True URL
+            html5 = get_url_param( vlc.path, "html5" )
+            if html5 == "True" and webm_path then
+                path = webm_path
+            end
         end
-        if t then
-            return { { path = base_yt_url .. "get_video?video_id="..video_id.."&t="..t..format; name = name; description = description; artist = artist; arturl = arturl; options = options } }
-        else
-            -- This shouldn't happen ... but keep it as a backup.
-            return { { path = "http://www.youtube.com/v/"..video_id; name = name; description = description; artist = artist; arturl = arturl; options=options } }
+
+        if not path then
+            if not base_yt_url then
+                base_yt_url = "http://youtube.com/"
+            end
+            if fmt then
+                format = "&fmt=" .. fmt
+            else
+                format = ""
+            end
+
+            if t then
+                path = base_yt_url .. "get_video?video_id="..video_id.."&t="..t..format
+            else
+                -- This shouldn't happen ... but keep it as a backup.
+                path = "http://www.youtube.com/v/"..video_id
+            end
         end
+        return { { path = path; name = name; description = description; artist = artist; arturl = arturl; options = options } }
     else -- This is the flash player's URL
         if string.match( vlc.path, "title=" ) then
-            name = get_url_param( vlc.path, "title" )
+            name = vlc.strings.decode_uri(get_url_param( vlc.path, "title" ))
         end
         video_id = get_url_param( vlc.path, "video_id" )
         arturl = get_arturl( vlc.path, video_id )