Cache aggregate stats to save time

This commit is contained in:
Ciaran Gultnieks 2014-04-29 15:33:20 +01:00
parent 63ce679a9d
commit 2a5c8a4aa2

View file

@ -23,6 +23,7 @@ import re
import time import time
import traceback import traceback
import glob import glob
import json
from optparse import OptionParser from optparse import OptionParser
import paramiko import paramiko
import socket import socket
@ -53,6 +54,8 @@ def main():
help="Restrict output to warnings and errors") help="Restrict output to warnings and errors")
parser.add_option("-d", "--download", action="store_true", default=False, parser.add_option("-d", "--download", action="store_true", default=False,
help="Download logs we don't have") help="Download logs we don't have")
parser.add_option("--recalc", action="store_true", default=False,
help="Recalculate aggregate stats - use when changes have been made that would invalidate old cached data.")
parser.add_option("--nologs", action="store_true", default=False, parser.add_option("--nologs", action="store_true", default=False,
help="Don't do anything logs-related") help="Don't do anything logs-related")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
@ -123,25 +126,55 @@ def main():
logsearch = re.compile(logexpr).search logsearch = re.compile(logexpr).search
for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')): for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
logging.debug('...' + logfile) logging.debug('...' + logfile)
if options.verbose:
print '...' + logfile # Get the date for this log - e.g. 2012-02-28
p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE) thisdate = os.path.basename(logfile)[7:-7]
matches = (logsearch(line) for line in p.stdout)
for match in matches: agg_path = os.path.join(datadir, thisdate + '.json')
if match and match.group('statuscode') == '200': if not options.recalc and os.path.exists(agg_path):
uri = match.group('uri') # Use previously calculated aggregate data
if uri.endswith('.apk'): with open(agg_path, 'r') as f:
_, apkname = os.path.split(uri) today = json.load(f)
app = knownapks.getapp(apkname)
if app: else:
appid, _ = app # Calculate from logs...
appscount[appid] += 1
# Strip the '.apk' from apkname today = {
appver = apkname[:-4] 'apps': Counter(),
appsvercount[appver] += 1 'appsver': Counter(),
else: 'unknown': []
if not apkname in unknownapks: }
unknownapks.append(apkname)
p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE)
matches = (logsearch(line) for line in p.stdout)
for match in matches:
if match and match.group('statuscode') == '200':
uri = match.group('uri')
if uri.endswith('.apk'):
_, apkname = os.path.split(uri)
app = knownapks.getapp(apkname)
if app:
appid, _ = app
today['apps'][appid] += 1
# Strip the '.apk' from apkname
appver = apkname[:-4]
today['appsver'][appver] += 1
else:
if not apkname in today['unknown']:
today['unknown'].append(apkname)
# Save calculated aggregate data for today to cache
with open(agg_path, 'w') as f:
json.dump(today, f)
# Add today's stats (whether cached or recalculated) to the total
for appid in today['apps']:
appscount[appid] += today['apps'][appid]
for appid in today['appsver']:
appsvercount[appid] += today['appsver'][appid]
for uk in today['unknown']:
if not uk in unknownapks:
unknownapks.append(uk)
# Calculate and write stats for total downloads... # Calculate and write stats for total downloads...
lst = [] lst = []