#!/usr/bin/env python # encoding: utf-8 import urllib.request from lxml import etree from packaging.version import parse as parse_version class BuildVersion(object): DOWNLOADPAGE = "https://www.libreoffice.org/download/download/" ARCHIVE = "https://downloadarchive.documentfoundation.org/libreoffice/old/" RELEASE = "https://download.documentfoundation.org/libreoffice/stable/" DAILY = "https://dev-builds.libreoffice.org/daily/master/Linux-rpm_deb-x86_64@tb87-TDF/" PRERELEASE = "https://dev-builds.libreoffice.org/pre-releases/deb/x86_64/" def __init__(self, query): self.query = query self.version = '' self.basedirurl = {} # Parsing the query input. if '.' in self.query: # Numbered self.version. Let's check it is a 4 dotted release if len(self.query.split('.')) == 4: self.version = self.query else: # If not 4 dotted, let's search for the 4 dotted version self.version = self.__getlatestrel(self.query) self.basedirurl = self.__getbaseurl(self.version) else: # String self.versions. a = self.__getbranchrel(self.query) if isinstance(a, list): # If it's a list, a number of releases are provided. self.version = [] self.basedirurl = [] for i in range(len(a)): self.version[i] = self.__getlatestrel(a[i]['version']) self.basedirurl[i] = a[i]['basedirurl'] else: self.version = a['version'] self.basedirurl = a['basedirurl'] def __getlatestrel(self, basever): """Search in downloadarchive for the latest version matching baseversion.""" versionlist = etree.HTML(urllib.request.urlopen(BuildVersion.ARCHIVE).read()).xpath('//td/a') # Getting a more polished matching list cleanlist = list(dict.fromkeys([x.text.strip('/') for x in versionlist if x.text.startswith(basever)])) # Sorting, then returning the last version return sorted(cleanlist)[-1] def __getbranchrel(self, branch): """Based on branch names, get the release number.""" basedirurl = {} version = '' if branch == 'daily': # The daily builds can be mostly distinguished by the day of build # (official version is constant. # The last built version is the next-to-last version [-2] on the page. fulldailypath = etree.HTML(urllib.request.urlopen(BuildVersion.DAILY).read()).xpath('//td/a')[-2].text dailyversion = fulldailypath.split('_')[0].replace('-', '') version newurl = str.join('/', [ BuildVersion.DAILY, fulldailypath, '' ]) basedirurl = { u'x86_64': newurl, u'x86': '-' } version = etree.HTML(urllib.request.urlopen(newurl).read()).xpath('//td/a')[1].text.split('_')[1] return { 'version': version + '-' + dailyversion, 'basedirurl': basedirurl } if branch == 'prerelease': # Also here, we'll rely on DownloadPage. Whenever a prerelease version is cited in the page. version = etree.HTML(urllib.request.urlopen(BuildVersion.DOWNLOADPAGE).read()).xpath('//p[@class="lead_libre"][last()]/following-sibling::ul[last()]/li/a/text()') retval = [] basedirurl = { u'x86': '-', u'x86_64': BuildVersion.PRERELEASE } if len(version) == 0: return retval for v in version: retval.append({ 'version': v, 'basedirurl': basedirurl }) return retval # Stable releases. # Old approach - Doesn't really work because RelEng can screw order. #versions = etree.HTML(urllib.request.urlopen(BuildVersion.RELEASE).read()).xpath('//td/a') #index = 1 #if branch == 'still': # index = -2 #elif branch == 'fresh': # index = -1 #version = self.__getlatestrel(versions[index].text.strip('/')) # Now I'll rely on DownloadPage versions = etree.HTML(urllib.request.urlopen(BuildVersion.DOWNLOADPAGE).read()).xpath('//span[@class="dl_version_number"]') index = 0 if branch == 'still': index = 1 elif branch == 'fresh': index = 0 version = self.__getlatestrel(versions[index].text) return { 'version': version, 'basedirurl': self.__getbaseurl(version) } def __getbaseurl(self, version): """Returns the links based on the numeric version.""" basedirurl = {} url = BuildVersion.ARCHIVE + '/' + version + '/deb/' # x86 binaries are not anymore offered after 6.3.0. if parse_version(version) < parse_version('6.3.0'): basedirurl[u'x86'] = url + 'x86/' else: basedirurl[u'x86'] = '-' basedirurl[u'x86_64'] = url + 'x86_64/' return basedirurl