scanner: fix regex for matching URLs in gradle maven{} blocks

closes #465

This script generated gradle-maven-blocks.yaml:
```python
import os
import re
import yaml

pat = re.compile(r'\smaven\s*{[^}]+}')

finds = set()
for root, dirs, files in os.walk('.'):
    for f in files:
        if '.gradle' in f:
            with open(os.path.join(root, f), errors='surrogateescape') as fp:
                contents = fp.read()
            for m in pat.findall(contents):
                finds.add(m)

with open('finds.yaml', 'w') as fp:
    yaml.dump(sorted(finds), fp, default_flow_style=False)

```
This commit is contained in:
Hans-Christoph Steiner 2020-06-03 23:40:01 +02:00
parent 6590f3869e
commit 0837289935
3 changed files with 819 additions and 5 deletions

View file

@ -37,6 +37,9 @@ options = None
DEFAULT_JSON_PER_BUILD = {'errors': [], 'warnings': [], 'infos': []}
json_per_build = DEFAULT_JSON_PER_BUILD
MAVEN_URL_REGEX = re.compile(r"""\smaven\s*{.*?(?:setUrl|url)\s*=?\s*(?:uri)?\(?\s*["']?([^\s"']+)["']?[^}]*}""",
re.DOTALL)
def get_gradle_compile_commands(build):
compileCommands = ['compile',
@ -101,8 +104,6 @@ def scan_source(build_dir, build=metadata.Build()):
if r.match(s) and not is_whitelisted(s):
yield n
gradle_mavenrepo = re.compile(r'maven *{ *(url)? *[\'"]?([^ \'"]*)[\'"]?')
allowed_repos = [re.compile(r'^https://' + re.escape(repo) + r'/*') for repo in [
'repo1.maven.org/maven2', # mavenCentral()
'jcenter.bintray.com', # jcenter()
@ -278,9 +279,8 @@ def scan_source(build_dir, build=metadata.Build()):
count += handleproblem("usual suspect \'%s\'" % (name),
path_in_build_dir, filepath)
noncomment_lines = [line for line in lines if not common.gradle_comment.match(line)]
joined = re.sub(r'[\n\r\s]+', ' ', ' '.join(noncomment_lines))
for m in gradle_mavenrepo.finditer(joined):
url = m.group(2)
no_comments = re.sub(r'/\*.*?\*/', '', ''.join(noncomment_lines), flags=re.DOTALL)
for url in MAVEN_URL_REGEX.findall(no_comments):
if not any(r.match(url) for r in allowed_repos):
count += handleproblem('unknown maven repo \'%s\'' % url, path_in_build_dir, filepath)