From 2162703a1ae8c7d9348362b5471d254fd53c998a Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 12:40:04 +0100 Subject: [PATCH 1/5] mirror: new command to make a mirror of a repo This creates a mirror of a full repo by downloading all files listed in the index, and the ones that are generated based on that data, e.g. icons of different resolutions. This could be useful for setting up mirrors of small repositories, instead of having to learn and manage rsync or something else for mirroring. This just needs a working repo. It uses wget in a batch mode with the aim as being as efficient as possible. wget mirroring over HTTP is always going to be less efficient than rsync, but it shouldn't be so bad since it uses --continue to check whether it has already downloaded a file. I suppose it could be extended to use ETags for a little more efficiency. I developed this creating a test mirror of f-droid.org, which is now a bit ironic, since I added a specific check to prevent people from using this on f-droid.org. --- completion/bash-completion | 7 ++ fdroid | 1 + fdroidserver/mirror.py | 141 +++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 fdroidserver/mirror.py diff --git a/completion/bash-completion b/completion/bash-completion index 8bdd333f..d18edd35 100644 --- a/completion/bash-completion +++ b/completion/bash-completion @@ -264,6 +264,12 @@ __complete_btlog() { __complete_options } +__complete_mirror() { + opts="-v" + lopts="--archive --output-dir" + __complete_options +} + __complete_nightly() { opts="-v -q" lopts="--show-secret-var" @@ -316,6 +322,7 @@ import \ init \ install \ lint \ +mirror \ nightly \ publish \ readmeta \ diff --git a/fdroid b/fdroid index a07a4ecf..f5e6c92b 100755 --- a/fdroid +++ b/fdroid @@ -48,6 +48,7 @@ commands = OrderedDict([ ("btlog", _("Update the binary transparency log for a URL")), ("signatures", _("Extract signatures from APKs")), ("nightly", _("Set up an app build for a nightly build repo")), + ("mirror", _("Download complete mirrors of small repos")), ]) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py new file mode 100644 index 00000000..3578f310 --- /dev/null +++ b/fdroidserver/mirror.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import io +import ipaddress +import json +import logging +import os +import socket +import subprocess +import sys +import zipfile +from argparse import ArgumentParser +from urllib.parse import urlparse + +from . import _ +from . import common +from . import net +from . import update + +options = None + + +def main(): + global options + + parser = ArgumentParser(usage="%(prog)s [options] url") + common.setup_global_opts(parser) + parser.add_argument("url", nargs='?', help=_("Base URL to mirror")) + parser.add_argument("--archive", action='store_true', default=False, + help=_("Also mirror the full archive section")) + parser.add_argument("--output-dir", default=os.getcwd(), + help=_("The directory to write the mirror to")) + options = parser.parse_args() + + baseurl = options.url + basedir = options.output_dir + + url = urlparse(baseurl) + hostname = url.netloc + ip = None + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + pass + if hostname == 'f-droid.org' \ + or (ip is not None and hostname in socket.gethostbyname_ex('f-droid.org')[2]): + print(_('ERROR: this command should never be used to mirror f-droid.org!\n' + 'A full mirror of f-droid.org requires more than 200GB.')) + sys.exit(1) + + path = url.path.rstrip('/') + if path.endswith('repo') or path.endswith('archive'): + logging.error(_('Do not include "{path}" in URL!').format(path=path.split('/')[-1])) + sys.exit(1) + elif not path.endswith('fdroid'): + logging.warning(_('{url} does not end with "fdroid", check the URL path!') + .format(url=baseurl)) + + icondirs = ['icons', ] + for density in update.screen_densities: + icondirs.append('icons-' + density) + + if options.archive: + sections = ('repo', 'archive') + else: + sections = ('repo', ) + + for section in sections: + sectionurl = baseurl + '/' + section + sectiondir = os.path.join(basedir, section) + repourl = sectionurl + '/index-v1.jar' + + content, etag = net.http_get(repourl) + with zipfile.ZipFile(io.BytesIO(content)) as zip: + jsoncontents = zip.open('index-v1.json').read() + + os.makedirs(sectiondir, exist_ok=True) + os.chdir(sectiondir) + for icondir in icondirs: + os.makedirs(os.path.join(sectiondir, icondir), exist_ok=True) + + data = json.loads(jsoncontents.decode('utf-8')) + urls = '' + for packageName, packageList in data['packages'].items(): + for package in packageList: + to_fetch = [] + for k in ('apkName', 'srcname'): + if k in package: + to_fetch.append(package[k]) + elif k == 'apkName': + logging.error(_('{appid} is missing {name}') + .format(appid=package['packageName'], name=k)) + for f in to_fetch: + if not os.path.exists(f) \ + or (f.endswith('.apk') and os.path.getsize(f) != package['size']): + url = sectionurl + '/' + f + urls += url + '\n' + urls += url + '.asc\n' + + for app in data['apps']: + localized = app.get('localized') + if localized: + for locale, d in localized.items(): + for k in update.GRAPHIC_NAMES: + f = d.get(k) + if f: + urls += '/'.join((sectionurl, locale, f)) + '\n' + for k in update.SCREENSHOT_DIRS: + filelist = d.get(k) + if filelist: + for f in filelist: + urls += '/'.join((sectionurl, locale, k, f)) + '\n' + + with open('.rsync-input-file', 'w') as fp: + fp.write(urls) + subprocess.call(['wget', '--continue', '--user-agent="fdroid mirror"', + '--input-file=.rsync-input-file']) + os.remove('.rsync-input-file') + + urls = dict() + for app in data['apps']: + if 'icon' not in app: + logging.error(_('no "icon" in {appid}').format(appid=app['packageName'])) + continue + icon = app['icon'] + for icondir in icondirs: + url = sectionurl + '/' + icondir + '/' + icon + if icondir not in urls: + urls[icondir] = '' + urls[icondir] += url + '\n' + + for icondir in icondirs: + os.chdir(os.path.join(basedir, section, icondir)) + with open('.rsync-input-file', 'w') as fp: + fp.write(urls[icondir]) + subprocess.call(['wget', '--continue', '--input-file=.rsync-input-file']) + os.remove('.rsync-input-file') + + +if __name__ == "__main__": + main() From d5481927f28bc50f8cdca9b767414e0affa70635 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 14:59:48 +0100 Subject: [PATCH 2/5] mirror: show help if no URL is provided --- fdroidserver/mirror.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py index 3578f310..62408502 100644 --- a/fdroidserver/mirror.py +++ b/fdroidserver/mirror.py @@ -23,7 +23,7 @@ options = None def main(): global options - parser = ArgumentParser(usage="%(prog)s [options] url") + parser = ArgumentParser(usage=_("%(prog)s [options] url")) common.setup_global_opts(parser) parser.add_argument("url", nargs='?', help=_("Base URL to mirror")) parser.add_argument("--archive", action='store_true', default=False, @@ -32,6 +32,11 @@ def main(): help=_("The directory to write the mirror to")) options = parser.parse_args() + if options.url is None: + logging.error(_('A URL is required as an argument!') + '\n') + parser.print_help() + sys.exit(1) + baseurl = options.url basedir = options.output_dir From 94e67a76235c9d3e6ea0814d70b6470a21d93dd4 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 16:54:27 +0100 Subject: [PATCH 3/5] mirror: verify index signature if fingerprint included in URL This keeps the old handling without signature checks so that it works without requiring Java/apksigner or the whole config.py parsing stuff. --- fdroidserver/mirror.py | 103 ++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 38 deletions(-) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py index 62408502..1b7ce1a9 100644 --- a/fdroidserver/mirror.py +++ b/fdroidserver/mirror.py @@ -1,20 +1,18 @@ #!/usr/bin/env python3 -import io import ipaddress -import json import logging import os +import posixpath import socket import subprocess import sys -import zipfile from argparse import ArgumentParser -from urllib.parse import urlparse +import urllib.parse from . import _ from . import common -from . import net +from . import index from . import update options = None @@ -25,10 +23,12 @@ def main(): parser = ArgumentParser(usage=_("%(prog)s [options] url")) common.setup_global_opts(parser) - parser.add_argument("url", nargs='?', help=_("Base URL to mirror")) + parser.add_argument("url", nargs='?', + help=_('Base URL to mirror, can include the index signing key ' + + 'using the query string: ?fingerprint=')) parser.add_argument("--archive", action='store_true', default=False, help=_("Also mirror the full archive section")) - parser.add_argument("--output-dir", default=os.getcwd(), + parser.add_argument("--output-dir", default=None, help=_("The directory to write the mirror to")) options = parser.parse_args() @@ -37,11 +37,36 @@ def main(): parser.print_help() sys.exit(1) - baseurl = options.url - basedir = options.output_dir + scheme, hostname, path, params, query, fragment = urllib.parse.urlparse(options.url) + fingerprint = urllib.parse.parse_qs(query).get('fingerprint') + + def _append_to_url_path(*args): + '''Append the list of path components to URL, keeping the rest the same''' + newpath = posixpath.join(path, *args) + return urllib.parse.urlunparse((scheme, hostname, newpath, params, query, fragment)) + + if fingerprint: + config = common.read_config(options) + if not ('jarsigner' in config or 'apksigner' in config): + logging.error(_('Java JDK not found! Install in standard location or set java_paths!')) + sys.exit(1) + + def _get_index(section, etag=None): + url = _append_to_url_path(section) + return index.download_repo_index(url, etag=etag) + else: + def _get_index(section, etag=None): + import io + import json + import zipfile + from . import net + url = _append_to_url_path(section, 'index-v1.jar') + content, etag = net.http_get(url) + with zipfile.ZipFile(io.BytesIO(content)) as zip: + jsoncontents = zip.open('index-v1.json').read() + data = json.loads(jsoncontents.decode('utf-8')) + return data, etag - url = urlparse(baseurl) - hostname = url.netloc ip = None try: ip = ipaddress.ip_address(hostname) @@ -53,39 +78,40 @@ def main(): 'A full mirror of f-droid.org requires more than 200GB.')) sys.exit(1) - path = url.path.rstrip('/') + path = path.rstrip('/') if path.endswith('repo') or path.endswith('archive'): - logging.error(_('Do not include "{path}" in URL!').format(path=path.split('/')[-1])) - sys.exit(1) + logging.warning(_('Do not include "{path}" in URL!') + .format(path=path.split('/')[-1])) elif not path.endswith('fdroid'): logging.warning(_('{url} does not end with "fdroid", check the URL path!') - .format(url=baseurl)) + .format(url=options.url)) icondirs = ['icons', ] for density in update.screen_densities: icondirs.append('icons-' + density) + if options.output_dir: + basedir = options.output_dir + else: + basedir = os.path.join(os.getcwd(), hostname, path.strip('/')) + os.makedirs(basedir, exist_ok=True) + if options.archive: sections = ('repo', 'archive') else: sections = ('repo', ) for section in sections: - sectionurl = baseurl + '/' + section sectiondir = os.path.join(basedir, section) - repourl = sectionurl + '/index-v1.jar' - content, etag = net.http_get(repourl) - with zipfile.ZipFile(io.BytesIO(content)) as zip: - jsoncontents = zip.open('index-v1.json').read() + data, etag = _get_index(section) os.makedirs(sectiondir, exist_ok=True) os.chdir(sectiondir) for icondir in icondirs: os.makedirs(os.path.join(sectiondir, icondir), exist_ok=True) - data = json.loads(jsoncontents.decode('utf-8')) - urls = '' + urls = [] for packageName, packageList in data['packages'].items(): for package in packageList: to_fetch = [] @@ -98,9 +124,8 @@ def main(): for f in to_fetch: if not os.path.exists(f) \ or (f.endswith('.apk') and os.path.getsize(f) != package['size']): - url = sectionurl + '/' + f - urls += url + '\n' - urls += url + '.asc\n' + urls.append(_append_to_url_path(section, f)) + urls.append(_append_to_url_path(section, f + '.asc')) for app in data['apps']: localized = app.get('localized') @@ -109,18 +134,19 @@ def main(): for k in update.GRAPHIC_NAMES: f = d.get(k) if f: - urls += '/'.join((sectionurl, locale, f)) + '\n' + urls.append(_append_to_url_path(section, app['packageName'], locale, f)) for k in update.SCREENSHOT_DIRS: filelist = d.get(k) if filelist: for f in filelist: - urls += '/'.join((sectionurl, locale, k, f)) + '\n' + urls.append(_append_to_url_path(section, app['packageName'], locale, k, f)) - with open('.rsync-input-file', 'w') as fp: - fp.write(urls) + with open(urls_file, 'w') as fp: + for url in urls: + fp.write(url.split('?')[0] + '\n') # wget puts query string in the filename subprocess.call(['wget', '--continue', '--user-agent="fdroid mirror"', - '--input-file=.rsync-input-file']) - os.remove('.rsync-input-file') + '--input-file=' + urls_file]) + os.remove(urls_file) urls = dict() for app in data['apps']: @@ -129,17 +155,18 @@ def main(): continue icon = app['icon'] for icondir in icondirs: - url = sectionurl + '/' + icondir + '/' + icon + url = _append_to_url_path(section, icondir, icon) if icondir not in urls: - urls[icondir] = '' - urls[icondir] += url + '\n' + urls[icondir] = [] + urls[icondir].append(url) for icondir in icondirs: os.chdir(os.path.join(basedir, section, icondir)) - with open('.rsync-input-file', 'w') as fp: - fp.write(urls[icondir]) - subprocess.call(['wget', '--continue', '--input-file=.rsync-input-file']) - os.remove('.rsync-input-file') + with open(urls_file, 'w') as fp: + for url in urls[icondir]: + fp.write(url.split('?')[0] + '\n') # wget puts query string in the filename + subprocess.call(['wget', '--continue', '--input-file=' + urls_file]) + os.remove(urls_file) if __name__ == "__main__": From 79a4c469b30bd473093edb59d89146ad466786a3 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 16:54:52 +0100 Subject: [PATCH 4/5] mirror: forward --verbose flag to wget to control output --- fdroidserver/mirror.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py index 1b7ce1a9..6ed69953 100644 --- a/fdroidserver/mirror.py +++ b/fdroidserver/mirror.py @@ -18,6 +18,22 @@ from . import update options = None +def _run_wget(path, urls): + if options.verbose: + verbose = '--verbose' + else: + verbose = '--no-verbose' + + os.chdir(path) + urls_file = '.fdroid-mirror-wget-input-file' + with open(urls_file, 'w') as fp: + for url in urls: + fp.write(url.split('?')[0] + '\n') # wget puts query string in the filename + subprocess.call(['wget', verbose, '--continue', '--user-agent="fdroid mirror"', + '--input-file=' + urls_file]) + os.remove(urls_file) + + def main(): global options @@ -141,12 +157,7 @@ def main(): for f in filelist: urls.append(_append_to_url_path(section, app['packageName'], locale, k, f)) - with open(urls_file, 'w') as fp: - for url in urls: - fp.write(url.split('?')[0] + '\n') # wget puts query string in the filename - subprocess.call(['wget', '--continue', '--user-agent="fdroid mirror"', - '--input-file=' + urls_file]) - os.remove(urls_file) + _run_wget(sectiondir, urls) urls = dict() for app in data['apps']: @@ -161,12 +172,7 @@ def main(): urls[icondir].append(url) for icondir in icondirs: - os.chdir(os.path.join(basedir, section, icondir)) - with open(urls_file, 'w') as fp: - for url in urls[icondir]: - fp.write(url.split('?')[0] + '\n') # wget puts query string in the filename - subprocess.call(['wget', '--continue', '--input-file=' + urls_file]) - os.remove(urls_file) + _run_wget(os.path.join(basedir, section, icondir), urls[icondir]) if __name__ == "__main__": From 09a6b37ac04b640a3f785ce5cb74baff9f36c2e7 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Wed, 29 Nov 2017 17:12:38 +0100 Subject: [PATCH 5/5] mirror: fix downloading of graphics It was downloading them all directly into the 'repo' folder. --- fdroidserver/mirror.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fdroidserver/mirror.py b/fdroidserver/mirror.py index 6ed69953..06595a44 100644 --- a/fdroidserver/mirror.py +++ b/fdroidserver/mirror.py @@ -24,6 +24,10 @@ def _run_wget(path, urls): else: verbose = '--no-verbose' + if not urls: + return + logging.debug(_('Running wget in {path}').format(path=path)) + os.makedirs(path, exist_ok=True) os.chdir(path) urls_file = '.fdroid-mirror-wget-input-file' with open(urls_file, 'w') as fp: @@ -142,22 +146,27 @@ def main(): or (f.endswith('.apk') and os.path.getsize(f) != package['size']): urls.append(_append_to_url_path(section, f)) urls.append(_append_to_url_path(section, f + '.asc')) + _run_wget(sectiondir, urls) for app in data['apps']: localized = app.get('localized') if localized: for locale, d in localized.items(): + urls = [] + components = (section, app['packageName'], locale) for k in update.GRAPHIC_NAMES: f = d.get(k) if f: - urls.append(_append_to_url_path(section, app['packageName'], locale, f)) + urls.append(_append_to_url_path(*components, f)) + _run_wget(os.path.join(basedir, *components), urls) for k in update.SCREENSHOT_DIRS: + urls = [] filelist = d.get(k) if filelist: + components = (section, app['packageName'], locale, k) for f in filelist: - urls.append(_append_to_url_path(section, app['packageName'], locale, k, f)) - - _run_wget(sectiondir, urls) + urls.append(_append_to_url_path(*components, f)) + _run_wget(os.path.join(basedir, *components), urls) urls = dict() for app in data['apps']: