diff --git a/loaih/versions.py b/loaih/versions.py index dc90f17..ade3fbc 100644 --- a/loaih/versions.py +++ b/loaih/versions.py @@ -4,119 +4,157 @@ import urllib.request from lxml import etree from packaging.version import parse as parse_version +import datetime -class BuildVersion(object): +class Definitions(object): DOWNLOADPAGE = "https://www.libreoffice.org/download/download/" ARCHIVE = "https://downloadarchive.documentfoundation.org/libreoffice/old/" RELEASE = "https://download.documentfoundation.org/libreoffice/stable/" DAILY = "https://dev-builds.libreoffice.org/daily/master/Linux-rpm_deb-x86_64@tb87-TDF/" PRERELEASE = "https://dev-builds.libreoffice.org/pre-releases/deb/x86_64/" + SELECTORS = { + 'still': { + 'URL': DOWNLOADPAGE, + 'xpath': '(//span[@class="dl_version_number"])[last()]/text()' + }, + 'fresh': { + 'URL': DOWNLOADPAGE, + 'xpath': '(//span[@class="dl_version_number"])[1]/text()' + }, + 'prerelease': { + 'URL': DOWNLOADPAGE, + 'xpath': '//p[@class="lead_libre"][last()]/following-sibling::ul[last()]/li/a/text()' + }, + 'daily': { + 'URL': DAILY, + 'xpath': '//td/a' + } + } + +class Base(object): + # Class for static methods which might be useful even outside the build + # scripts. + + @staticmethod + def dailyver(date = datetime.datetime.today()): + """Returns versions present on the latest daily build.""" + url = Base.dailyurl(date)['x86_64'] + # Since it is possible that a daily build is not yet provided... + if url == '-': + return [] + + # Rerun the page parsing, this time to find out the versions built + + b = etree.HTML(urllib.request.urlopen(Base.dailyurl(date)['x86_64']).read()).xpath("//td/a[contains(text(), '_deb.tar.gz')]/text()") + # This should have returned the main package for a version, but can + # have returned multiple ones, so let's treat it as a list + return [ x.split('_')[1] for x in b ] + + @staticmethod + def dailyurl(date = datetime.datetime.today()): + """Returns the URL for the latest valid daily build.""" + # As per other parts of the build, we need to maintain an URL also for + # x86 versions that it isn't really provided. + # As such, the return value must be a dictionary + + # Get the anchor for today's builds + a = etree.HTML(urllib.request.urlopen(Definitions.DAILY).read()).xpath("//td/a[contains(text(), '" + date.strftime('%Y-%m-%d') + "')]/text()") + if len(a) == 0: + # No results found, no version found, let's return a + return { 'x86': '-', 'x86_64': '-' } + + # On the contrary, more than a version is found. let's order the + # list and get the latest item + return { 'x86': '-', 'x86_64': Definitions.SELECTORS['daily']['URL'] + sorted(a)[-1] } + + @staticmethod + def namedver(query): + """Gets the version for a specific named version.""" + + if query == 'daily' or query == 'yesterday': + # Daily needs double parsing for the same result to apply. + # We first select today's build anchor: + date = datetime.datetime.today() + if query == 'yesterday': + # Use yesterdays' date for testing purposes. + date += datetime.timedelta(days=-1) + return Base.dailyver(date) + + # In case the query isn't for daily + return etree.HTML(urllib.request.urlopen(Definitions.SELECTORS[query]['URL']).read()).xpath(Definitions.SELECTORS[query]['xpath']) + + @staticmethod + def fullversion(version): + """Get latest full version from Archive based on partial version.""" + versionlist = etree.HTML(urllib.request.urlopen(Definitions.ARCHIVE).read()).xpath("//td/a[starts-with(text(), '" + version + "')]/text()") + cleanlist = sorted([ x.strip('/') for x in versionlist ]) + + # Sorting, then returning the last version + return cleanlist[-1] + + @staticmethod + def urlfromqueryandver(query, version): + """Returns the fetching URL based on the queried version and the numeric version of it.""" + # This has the purpose to simplify and explain how the releases are + # layed out. + + # If the query tells about daily or 'yesterday' (for testing purposes), + # we might ignore versions and return the value coming from dailyurl: + if query == 'daily': + return Base.dailyurl() + if query == 'yesterday': + date = datetime.datetime.today() + datetime.timedelta(days=-1) + return Base.dailyurl(date) + + # All other versions will be taken from Archive, as such we need a full + # version. + + # If the version has only 2 points in it (or splits into three parts by '.'), that's not a full version and we will call the getlatestver() function + fullversion = version + if len(version.split('.')) == 3: + fullversion = Base.fullversion(version) + + # So the final URL is the Archive one, plus the full versions, plus a + # final '/deb/' - and an arch subfolder + baseurl = Definitions.ARCHIVE + fullversion + '/deb/' + retval = {} + + # x86 binaries are not anymore offered after 6.3.0. + if parse_version(version) < parse_version('6.3.0'): + retval['x86'] = baseurl + 'x86/' + else: + retval['x86'] = '-' + + retval['x86_64'] = baseurl + 'x86_64/' + + return retval + + +class Build(object): + def __init__(self, query): + """Should simplify the single builded version.""" self.query = query self.version = '' self.basedirurl = {} - # Parsing the query input. - if '.' in self.query: - # Numbered self.version. Let's check it is a 4 dotted release - if len(self.query.split('.')) == 4: - self.version = self.query - else: - # If not 4 dotted, let's search for the 4 dotted version - self.version = self.__getlatestrel(self.query) - - self.basedirurl = self.__getbaseurl(self.version) - else: - # String self.versions. - a = self.__getbranchrel(self.query) + # For simplification process, let's before see if there's a named query + # (it shouldn't) + if not '.' in self.query: + # Named version. Let's get the result of the query. + a = Base.namedver(self.query) - if isinstance(a, list): - # If it's a list, a number of releases are provided. - self.version = [] - self.basedirurl = [] - for i in range(len(a)): - self.version[i] = self.__getlatestrel(a[i]['version']) - self.basedirurl[i] = a[i]['basedirurl'] - - else: - self.version = a['version'] - self.basedirurl = a['basedirurl'] - - def __getlatestrel(self, basever): - """Search in downloadarchive for the latest version matching baseversion.""" - versionlist = etree.HTML(urllib.request.urlopen(BuildVersion.ARCHIVE).read()).xpath('//td/a') - # Getting a more polished matching list - cleanlist = list(dict.fromkeys([x.text.strip('/') for x in versionlist if x.text.startswith(basever)])) - - # Sorting, then returning the last version - return sorted(cleanlist)[-1] - - def __getbranchrel(self, branch): - """Based on branch names, get the release number.""" - basedirurl = {} - version = '' - if branch == 'daily': - # The daily builds can be mostly distinguished by the day of build - # (official version is constant. - - # The last built version is the next-to-last version [-2] on the page. - fulldailypath = etree.HTML(urllib.request.urlopen(BuildVersion.DAILY).read()).xpath('//td/a')[-2].text - dailyversion = fulldailypath.split('_')[0].replace('-', '') - version - newurl = str.join('/', [ BuildVersion.DAILY, fulldailypath, '' ]) - - basedirurl = { u'x86_64': newurl, u'x86': '-' } - version = etree.HTML(urllib.request.urlopen(newurl).read()).xpath('//td/a')[1].text.split('_')[1] - - return { 'version': version + '-' + dailyversion, 'basedirurl': basedirurl } - - if branch == 'prerelease': - # Also here, we'll rely on DownloadPage. Whenever a prerelease version is cited in the page. - version = etree.HTML(urllib.request.urlopen(BuildVersion.DOWNLOADPAGE).read()).xpath('//p[@class="lead_libre"][last()]/following-sibling::ul[last()]/li/a/text()') - retval = [] - basedirurl = { u'x86': '-', u'x86_64': BuildVersion.PRERELEASE } - - if len(version) == 0: - return retval - - for v in version: - retval.append({ 'version': v, 'basedirurl': basedirurl }) - - return retval - - # Stable releases. - # Old approach - Doesn't really work because RelEng can screw order. - #versions = etree.HTML(urllib.request.urlopen(BuildVersion.RELEASE).read()).xpath('//td/a') - #index = 1 - #if branch == 'still': - # index = -2 - #elif branch == 'fresh': - # index = -1 - #version = self.__getlatestrel(versions[index].text.strip('/')) - - # Now I'll rely on DownloadPage - versions = etree.HTML(urllib.request.urlopen(BuildVersion.DOWNLOADPAGE).read()).xpath('//span[@class="dl_version_number"]') - index = 0 - if branch == 'still': - index = 1 - elif branch == 'fresh': - index = 0 - version = self.__getlatestrel(versions[index].text) - - return { 'version': version, 'basedirurl': self.__getbaseurl(version) } - - def __getbaseurl(self, version): - """Returns the links based on the numeric version.""" - basedirurl = {} - url = BuildVersion.ARCHIVE + '/' + version + '/deb/' - - # x86 binaries are not anymore offered after 6.3.0. - if parse_version(version) < parse_version('6.3.0'): - basedirurl[u'x86'] = url + 'x86/' - else: - basedirurl[u'x86'] = '-' + if len(a) != 1: + raise Exception("Queried version for build does not return a single result. Please unfold manually. Exiting.") - basedirurl[u'x86_64'] = url + 'x86_64/' - - return basedirurl + # So if it is here, the version is one. + self.query = a[0] + + if len(self.query.split('.')) == 4: + self.version = self.query + else: + # If not 4 dotted, let's search for the 4 dotted version + self.version = Base.fullversion(self.query) + + self.basedirurl = Base.urlfromqueryandver(self.query, self.version) diff --git a/scripts/loaih-getversion b/scripts/loaih-getversion index 59d03ae..e9d2c04 100644 --- a/scripts/loaih-getversion +++ b/scripts/loaih-getversion @@ -2,21 +2,27 @@ # encoding: utf-8 import click -from loaih.versions import BuildVersion +import loaih.versions as versions import re, sys, json @click.command() @click.option('-o', '--output', default = 'rundeck', type=click.Choice(['rundeck', 'json', 'text' ], case_sensitive=False), help="Output format, defaulting to Rundeck Key/Value data format. Options: rundeck,json,text") @click.argument('query') def getversion(query, output): - b = BuildVersion(query) + b = [] + if '.' in query: + b.append(versions.Build(query)) + else: + # In case of names, we might want to loop for versions. So we before + # get the versions available, based on query. + vers = versions.Base.namedver(query) + if len(vers) != 1: + for v in vers: + b.append(versions.Build(v)) + else: + b = versions.Build(vers[0]) - if output.lower() == 'rundeck': - print("""RUNDECK:DATA: query = {query} -RUNDECK:DATA: version = {version} -RUNDECK:DATA: x86 = {x86_url} -RUNDECK:DATA: x86_64 = {x86_64_url}""".format(query = query, version = b.version, x86_url = b.basedirurl['x86'], x86_64_url = b.basedirurl['x86_64'])) - elif output.lower() == 'json': + if output.lower() == 'json': output = { 'query': query, 'version': b.version,