#!/usr/bin/env python3 # # stats.py - part of the FDroid server tools # Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import sys import os import re import time import traceback import glob import json from argparse import ArgumentParser import paramiko import socket import logging import common import metadata import subprocess from collections import Counter def carbon_send(key, value): s = socket.socket() s.connect((config['carbon_host'], config['carbon_port'])) msg = '%s %d %d\n' % (key, value, int(time.time())) s.sendall(msg) s.close() options = None config = None def most_common_stable(counts): pairs = [] for s in counts: pairs.append((s, counts[s])) return sorted(pairs, key=lambda t: (-t[1], t[0])) def main(): global options, config # Parse command line... parser = ArgumentParser() common.setup_global_opts(parser) parser.add_argument("-d", "--download", action="store_true", default=False, help="Download logs we don't have") parser.add_argument("--recalc", action="store_true", default=False, help="Recalculate aggregate stats - use when changes " "have been made that would invalidate old cached data.") parser.add_argument("--nologs", action="store_true", default=False, help="Don't do anything logs-related") options = parser.parse_args() config = common.read_config(options) if not config['update_stats']: logging.info("Stats are disabled - set \"update_stats = True\" in your config.py") sys.exit(1) # Get all metadata-defined apps... allmetaapps = [app for app in metadata.read_metadata().itervalues()] metaapps = [app for app in allmetaapps if not app.Disabled] statsdir = 'stats' logsdir = os.path.join(statsdir, 'logs') datadir = os.path.join(statsdir, 'data') if not os.path.exists(statsdir): os.mkdir(statsdir) if not os.path.exists(logsdir): os.mkdir(logsdir) if not os.path.exists(datadir): os.mkdir(datadir) if options.download: # Get any access logs we don't have... ssh = None ftp = None try: logging.info('Retrieving logs') ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.connect(config['stats_server'], username=config['stats_user'], timeout=10, key_filename=config['webserver_keyfile']) ftp = ssh.open_sftp() ftp.get_channel().settimeout(60) logging.info("...connected") ftp.chdir('logs') files = ftp.listdir() for f in files: if f.startswith('access-') and f.endswith('.log.gz'): destpath = os.path.join(logsdir, f) destsize = ftp.stat(f).st_size if (not os.path.exists(destpath) or os.path.getsize(destpath) != destsize): logging.debug("...retrieving " + f) ftp.get(f, destpath) except Exception: traceback.print_exc() sys.exit(1) finally: # Disconnect if ftp is not None: ftp.close() if ssh is not None: ssh.close() knownapks = common.KnownApks() unknownapks = [] if not options.nologs: # Process logs logging.info('Processing logs...') appscount = Counter() appsvercount = Counter() logexpr = '(?P[.:0-9a-fA-F]+) - - \[(?P