scanner: expose "usual suspects" patterns for use in an API

This commit is contained in:
Hans-Christoph Steiner 2020-10-01 10:22:32 +02:00
parent 3c64996089
commit 05cd8c6810

View file

@ -40,26 +40,7 @@ json_per_build = DEFAULT_JSON_PER_BUILD
MAVEN_URL_REGEX = re.compile(r"""\smaven\s*{.*?(?:setUrl|url)\s*=?\s*(?:uri)?\(?\s*["']?([^\s"']+)["']?[^}]*}""", MAVEN_URL_REGEX = re.compile(r"""\smaven\s*{.*?(?:setUrl|url)\s*=?\s*(?:uri)?\(?\s*["']?([^\s"']+)["']?[^}]*}""",
re.DOTALL) re.DOTALL)
CODE_SIGNATURES = {
def get_gradle_compile_commands(build):
compileCommands = ['compile',
'provided',
'apk',
'implementation',
'api',
'compileOnly',
'runtimeOnly']
buildTypes = ['', 'release']
flavors = ['']
if build.gradle and build.gradle != ['yes']:
flavors += build.gradle
commands = [''.join(c) for c in itertools.product(flavors, buildTypes, compileCommands)]
return [re.compile(r'\s*' + c, re.IGNORECASE) for c in commands]
def scan_binary(apkfile):
usual_suspects = {
# The `apkanalyzer dex packages` output looks like this: # The `apkanalyzer dex packages` output looks like this:
# M d 1 1 93 <packagename> <other stuff> # M d 1 1 93 <packagename> <other stuff>
# The first column has P/C/M/F for package, class, method or field # The first column has P/C/M/F for package, class, method or field
@ -73,30 +54,10 @@ def scan_binary(apkfile):
r'(com\.google\.analytics[^\s]*)', r'(com\.google\.analytics[^\s]*)',
r'(com\.android\.billing[^\s]*)', r'(com\.android\.billing[^\s]*)',
] ]
} }
logging.info("Scanning APK for known non-free classes.")
result = common.SdkToolsPopen(["apkanalyzer", "dex", "packages", "--defined-only", apkfile], output=False)
problems = 0
for suspect, regexp in usual_suspects.items():
matches = regexp.findall(result.output)
if matches:
for m in set(matches):
logging.debug("Found class '%s'" % m)
problems += 1
if problems:
logging.critical("Found problems in %s" % apkfile)
return problems
# Common known non-free blobs (always lower case):
def scan_source(build_dir, build=metadata.Build()): NON_FREE_GRADLE_LINES = {
"""Scan the source code in the given directory (and all subdirectories)
and return the number of fatal problems encountered
"""
count = 0
# Common known non-free blobs (always lower case):
usual_suspects = {
exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [ exp: re.compile(r'.*' + exp, re.IGNORECASE) for exp in [
r'flurryagent', r'flurryagent',
r'paypal.*mpl', r'paypal.*mpl',
@ -118,7 +79,47 @@ def scan_source(build_dir, build=metadata.Build()):
r'com.tencent.bugly', r'com.tencent.bugly',
r'appcenter-push', r'appcenter-push',
] ]
} }
def get_gradle_compile_commands(build):
compileCommands = ['compile',
'provided',
'apk',
'implementation',
'api',
'compileOnly',
'runtimeOnly']
buildTypes = ['', 'release']
flavors = ['']
if build.gradle and build.gradle != ['yes']:
flavors += build.gradle
commands = [''.join(c) for c in itertools.product(flavors, buildTypes, compileCommands)]
return [re.compile(r'\s*' + c, re.IGNORECASE) for c in commands]
def scan_binary(apkfile):
logging.info("Scanning APK for known non-free classes.")
result = common.SdkToolsPopen(["apkanalyzer", "dex", "packages", "--defined-only", apkfile], output=False)
problems = 0
for suspect, regexp in CODE_SIGNATURES.items():
matches = regexp.findall(result.output)
if matches:
for m in set(matches):
logging.debug("Found class '%s'" % m)
problems += 1
if problems:
logging.critical("Found problems in %s" % apkfile)
return problems
def scan_source(build_dir, build=metadata.Build()):
"""Scan the source code in the given directory (and all subdirectories)
and return the number of fatal problems encountered
"""
count = 0
whitelisted = [ whitelisted = [
'firebase-jobdispatcher', # https://github.com/firebase/firebase-jobdispatcher-android/blob/master/LICENSE 'firebase-jobdispatcher', # https://github.com/firebase/firebase-jobdispatcher-android/blob/master/LICENSE
@ -130,7 +131,7 @@ def scan_source(build_dir, build=metadata.Build()):
return any(wl in s for wl in whitelisted) return any(wl in s for wl in whitelisted)
def suspects_found(s): def suspects_found(s):
for n, r in usual_suspects.items(): for n, r in NON_FREE_GRADLE_LINES.items():
if r.match(s) and not is_whitelisted(s): if r.match(s) and not is_whitelisted(s):
yield n yield n