#!/usr/bin/env python # encoding: utf-8 import urllib.request from lxml import etree from packaging.version import parse as parse_version import datetime class Definitions(object): DOWNLOADPAGE = "https://www.libreoffice.org/download/download/" ARCHIVE = "https://downloadarchive.documentfoundation.org/libreoffice/old/" RELEASE = "https://download.documentfoundation.org/libreoffice/stable/" DAILY = "https://dev-builds.libreoffice.org/daily/master/Linux-rpm_deb-x86_64@tb87-TDF/" PRERELEASE = "https://dev-builds.libreoffice.org/pre-releases/deb/x86_64/" SELECTORS = { 'still': { 'URL': DOWNLOADPAGE, 'xpath': '(//span[@class="dl_version_number"])[last()]/text()' }, 'fresh': { 'URL': DOWNLOADPAGE, 'xpath': '(//span[@class="dl_version_number"])[1]/text()' }, 'prerelease': { 'URL': DOWNLOADPAGE, 'xpath': '//p[@class="lead_libre"][last()]/following-sibling::ul[last()]/li/a/text()' }, 'daily': { 'URL': DAILY, 'xpath': '//td/a' } } class Base(object): # Class for static methods which might be useful even outside the build # scripts. @staticmethod def dailyver(date = datetime.datetime.today()): """Returns versions present on the latest daily build.""" url = Base.dailyurl(date)['x86_64'] # Since it is possible that a daily build is not yet provided... if url == '-': return [] # Rerun the page parsing, this time to find out the versions built b = etree.HTML(urllib.request.urlopen(Base.dailyurl(date)['x86_64']).read()).xpath("//td/a[contains(text(), '_deb.tar.gz')]/text()") # This should have returned the main package for a version, but can # have returned multiple ones, so let's treat it as a list return [ x.split('_')[1] for x in b ] @staticmethod def dailyurl(date = datetime.datetime.today()): """Returns the URL for the latest valid daily build.""" # As per other parts of the build, we need to maintain an URL also for # x86 versions that it isn't really provided. # As such, the return value must be a dictionary # Get the anchor for today's builds a = etree.HTML(urllib.request.urlopen(Definitions.DAILY).read()).xpath("//td/a[contains(text(), '" + date.strftime('%Y-%m-%d') + "')]/text()") if len(a) == 0: # No results found, no version found, let's return a return { 'x86': '-', 'x86_64': '-' } # On the contrary, more than a version is found. let's order the # list and get the latest item return { 'x86': '-', 'x86_64': Definitions.SELECTORS['daily']['URL'] + sorted(a)[-1] } @staticmethod def namedver(query): """Gets the version for a specific named version.""" if query == 'daily' or query == 'yesterday': # Daily needs double parsing for the same result to apply. # We first select today's build anchor: date = datetime.datetime.today() if query == 'yesterday': # Use yesterdays' date for testing purposes. date += datetime.timedelta(days=-1) return Base.dailyver(date) # In case the query isn't for daily return etree.HTML(urllib.request.urlopen(Definitions.SELECTORS[query]['URL']).read()).xpath(Definitions.SELECTORS[query]['xpath']) @staticmethod def fullversion(version): """Get latest full version from Archive based on partial version.""" versionlist = etree.HTML(urllib.request.urlopen(Definitions.ARCHIVE).read()).xpath("//td/a[starts-with(text(), '" + version + "')]/text()") cleanlist = sorted([ x.strip('/') for x in versionlist ]) # Sorting, then returning the last version return cleanlist[-1] @staticmethod def urlfromqueryandver(query, version): """Returns the fetching URL based on the queried version and the numeric version of it.""" # This has the purpose to simplify and explain how the releases are # layed out. # If the query tells about daily or 'yesterday' (for testing purposes), # we might ignore versions and return the value coming from dailyurl: if query == 'daily': return Base.dailyurl() if query == 'yesterday': date = datetime.datetime.today() + datetime.timedelta(days=-1) return Base.dailyurl(date) # All other versions will be taken from Archive, as such we need a full # version. # If the version has only 2 points in it (or splits into three parts by '.'), that's not a full version and we will call the getlatestver() function fullversion = version if len(version.split('.')) == 3: fullversion = Base.fullversion(version) # So the final URL is the Archive one, plus the full versions, plus a # final '/deb/' - and an arch subfolder baseurl = Definitions.ARCHIVE + fullversion + '/deb/' retval = {} # x86 binaries are not anymore offered after 6.3.0. if parse_version(version) < parse_version('6.3.0'): retval['x86'] = baseurl + 'x86/' else: retval['x86'] = '-' retval['x86_64'] = baseurl + 'x86_64/' return retval class Build(object): def __init__(self, query): """Should simplify the single builded version.""" self.query = query self.version = '' self.basedirurl = {} # For simplification process, let's before see if there's a named query # (it shouldn't) if not '.' in self.query: # Named version. Let's get the result of the query. a = Base.namedver(self.query) if len(a) != 1: raise Exception("Queried version for build does not return a single result. Please unfold manually. Exiting.") # So if it is here, the version is one. self.query = a[0] if len(self.query.split('.')) == 4: self.version = self.query else: # If not 4 dotted, let's search for the 4 dotted version self.version = Base.fullversion(self.query) self.basedirurl = Base.urlfromqueryandver(self.query, self.version)