#!/usr/bin/env python # encoding: utf-8 import urllib.request from lxml import etree import re, sys archiveurl = "https://downloadarchive.documentfoundation.org/libreoffice/old/" baseurl = "https://download.documentfoundation.org/libreoffice/" def getlatestrel(basever): """Search in downloadarchive the latest version matching the baseversion indicated.""" versionlist = etree.HTML(urllib.request.urlopen(archiveurl).read()).xpath('//td/a') # Getting a more polished matching list cleanlist = list(dict.fromkeys([x.strip('/') if x.startswith(basever) else None for x in versionlist])) # Removing None entries cleanlist.remove(None) # Sorting, then returning the last version return sorted(cleanlist)[-1] if len(sys.argv) > 1: # A version has been specified. version = sys.argv[1] if '.' in version: # Numbered version. # Let's check it is a 4 dotted release if not len(version.split('.')) == 4: version = getlatestrel(version) else: # string version if version == 'still': elif version == 'fresh': elif version == 'daily': # Running for stable channel html_stable = urllib.request.urlopen(baseurl + 'stable/').read() # Processing page content stable_contents = etree.HTML(html_stable) # Processing first page links stable_versions = [] for link in stable_contents.xpath('//td/a'): next if link.text == "Parent Directory" stable_versions.append(link.get('href').replace('/', '') # getting URL for specific versions for arch in [ 'x86', 'x86_64' ]: newurl = baseurl + 'stable/' + link.get('href') + 'deb/' + arch + '/' res = etree.HTML(urllib.request.urlopen(newurl).read()).xpath("//td/a") if len(res) == 1: # No packages provided next for link in res: next if res.text == 'Parent Directory' next if re.search(r'deb.tar.gz$', res.text): # Matches a package - good. # Checking for language packs and offline help