Skip to content

Commit

Permalink
Fixup info plugin's exclusion logic
Browse files Browse the repository at this point in the history
Changed slash removal from resolved patterns
Added CWD to platform debug file
Added excluded paths to platform debug file
Added venv exclusion based on file
Changed regex match function to search to support subprojects
Set explicit dotfile name patterns
Reverted build assets gathering dc808ca
Reverted info.gitignore file 79129d5
  • Loading branch information
kamilkrzyskow committed Mar 9, 2024
1 parent fdeafbb commit d28901e
Show file tree
Hide file tree
Showing 7 changed files with 209 additions and 141 deletions.
37 changes: 0 additions & 37 deletions material/plugins/info/info.gitignore

This file was deleted.

56 changes: 56 additions & 0 deletions material/plugins/info/patterns.py
@@ -0,0 +1,56 @@
def get_exclusion_patterns():
"""
Regex patterns, which will be compared against directory and file names
case-sensitively. https://docs.python.org/3/library/re.html#re.search is the
matching function and scans the whole string to find any pattern match. Used
with the https://pypi.org/project/regex/ module.
Additional remarks for pattern creation:
- The compared paths will be always in POSIX format.
- Each directory path will have a / at the end to allow to distinguish them
from files.
- Patterns for dynamic or custom paths like Virtual Environments (venv) or
build site directories are created during plugin runtime.
"""
return [
# Python cache directory
r"/__pycache__/",

# NodeJS modules
r"/node_modules/",

# macOS
r"/\.DS_Store$",

# Common IDE directories
r"/\.vscode/",
r"/\.vs/",
r"/\.idea/",

# Git repository
r"/\.git/",

# Generated files and folders
r"/[^/]+\.zip$",

# .cache files and folders
r"/[^/]*\.cache($|/)",

# Common developer .dotfiles
r"/\.gitignore$",
r"/\.gitattributes$",
r"/\.gitmodules$",
r"/\.editorconfig$",
r"/\.dockerignore$",
r"/\.eslintignore$",
r"/\.eslintrc$",
r"/\.stylelintignore$",
r"/\.stylelintrc$",
r"/\.browserslistrc$",
r"/\.babelrc$",
r"/\.codecov\.yml$",
r"/\.coveragerc$",
r"/\.pre-commit-config\.yaml$",
r"/\.pyspelling\.yml$",
r"/\.flake8$",
]
81 changes: 48 additions & 33 deletions material/plugins/info/plugin.py
Expand Up @@ -39,6 +39,7 @@
from zipfile import ZipFile, ZIP_DEFLATED

from .config import InfoConfig
from .patterns import get_exclusion_patterns

# -----------------------------------------------------------------------------
# Classes
Expand All @@ -56,6 +57,7 @@ def __init__(self, *args, **kwargs):

# Initialize empty members
self.exclusion_patterns = []
self.excluded_entries = []

# Determine whether we're serving the site
def on_startup(self, *, command, dirty):
Expand Down Expand Up @@ -183,8 +185,9 @@ def on_config(self, config):
example, _ = os.path.splitext(example)
example = "-".join([present, slugify(example, "-")])

# Load exclusion patterns
self.exclusion_patterns = _load_exclusion_patterns()
# Get local copy of the exclusion patterns
self.exclusion_patterns = get_exclusion_patterns()
self.excluded_entries = []

# Exclude the site_dir at project root
if config.site_dir.startswith(os.getcwd()):
Expand Down Expand Up @@ -217,18 +220,7 @@ def on_config(self, config):
path = os.path.join(abs_root, name)

# Exclude the directory and all subdirectories
if self._is_excluded(_resolve_pattern(path)):
dirnames.remove(name)
continue

# Projects, which don't use the projects plugin for
# multi-language support could have separate build folders
# for each config file or language. Therefore, we exclude
# them with the assumption a site_dir contains the sitemap
# file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(path, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {path}")
if self._is_excluded(path):
dirnames.remove(name)

# Write files to the in-memory archive
Expand All @@ -237,7 +229,7 @@ def on_config(self, config):
path = os.path.join(abs_root, name)

# Exclude the file
if self._is_excluded(_resolve_pattern(path)):
if self._is_excluded(path):
continue

# Resolve the relative path to create a matching structure
Expand All @@ -261,11 +253,14 @@ def on_config(self, config):
"system": platform.platform(),
"architecture": platform.architecture(),
"python": platform.python_version(),
"cwd": os.getcwd(),
"command": " ".join([
sys.argv[0].rsplit(os.sep, 1)[-1],
*sys.argv[1:]
]),
"sys.path": sys.path
"env:$PYTHONPATH": os.getenv("PYTHONPATH", ""),
"sys.path": sys.path,
"excluded_entries": self.excluded_entries
},
default = str,
indent = 2
Expand Down Expand Up @@ -374,13 +369,43 @@ def _help_on_not_in_cwd(self, outside_root):
if self.config.archive_stop_on_violation:
sys.exit(1)

# Exclude files which we don't want in our zip file
def _is_excluded(self, posix_path: str) -> bool:
# Check if path is excluded and should be omitted from the zip. Use pattern
# matching for files and folders, and lookahead specific files in folders to
# skip them. Side effect: Save excluded paths to save them in the zip file.
def _is_excluded(self, abspath: str) -> bool:

# Resolve the path into POSIX format to match the patterns
pattern_path = _resolve_pattern(abspath)

for pattern in self.exclusion_patterns:
if regex.match(pattern, posix_path):
log.debug(f"Excluded pattern '{pattern}': {posix_path}")
if regex.search(pattern, pattern_path):
log.debug(f"Excluded pattern '{pattern}': {abspath}")
self.excluded_entries.append(f"{pattern} - {pattern_path}")
return True

# File exclusion should be limited to pattern matching
if os.path.isfile(abspath):
return False

# Projects, which don't use the projects plugin for multi-language
# support could have separate build folders for each config file or
# language. Therefore, we exclude them with the assumption a site_dir
# contains the sitemap file. Example of such a setup: https://t.ly/DLQcy
sitemap_gz = os.path.join(abspath, "sitemap.xml.gz")
if os.path.exists(sitemap_gz):
log.debug(f"Excluded site_dir: {abspath}")
self.excluded_entries.append(f"sitemap.xml.gz - {pattern_path}")
return True

# People can use multiple Virtual Environments, se we have to handle
# both active and inactive venvs. All systems share a pyvenv.cfg file in
# the root of the venv directory. Example: https://t.ly/cIxqD
venv_cfg = os.path.join(abspath, "pyvenv.cfg")
if os.path.exists(venv_cfg):
log.debug(f"Excluded venv: {abspath}")
self.excluded_entries.append(f"pyvenv.cfg - {pattern_path}")
return True

return False

# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -435,29 +460,19 @@ def _load_yaml(abs_src_path: str):

return result

# Load info.gitignore, ignore any empty lines or # comments
def _load_exclusion_patterns(path: str = None):
if path is None:
path = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(path, "info.gitignore")

with open(path, encoding = "utf-8") as file:
lines = map(str.strip, file.readlines())

return [line for line in lines if line and not line.startswith("#")]

# Get a normalized POSIX path for the pattern matching with removed current
# working directory prefix. Directory paths end with a '/' to allow more control
# in the pattern creation for files and directories.
def _resolve_pattern(abspath: str):
path = abspath.replace(os.getcwd(), "", 1).replace(os.sep, "/")
path = abspath.replace(os.getcwd(), "", 1)
path = path.replace(os.sep, "/").rstrip("/")

if not path:
return "/"

# Check abspath, as the file needs to exist
if not os.path.isfile(abspath):
return path.rstrip("/") + "/"
return path + "/"

return path

Expand Down
37 changes: 0 additions & 37 deletions src/plugins/info/info.gitignore

This file was deleted.

56 changes: 56 additions & 0 deletions src/plugins/info/patterns.py
@@ -0,0 +1,56 @@
def get_exclusion_patterns():
"""
Regex patterns, which will be compared against directory and file names
case-sensitively. https://docs.python.org/3/library/re.html#re.search is the
matching function and scans the whole string to find any pattern match. Used
with the https://pypi.org/project/regex/ module.
Additional remarks for pattern creation:
- The compared paths will be always in POSIX format.
- Each directory path will have a / at the end to allow to distinguish them
from files.
- Patterns for dynamic or custom paths like Virtual Environments (venv) or
build site directories are created during plugin runtime.
"""
return [
# Python cache directory
r"/__pycache__/",

# NodeJS modules
r"/node_modules/",

# macOS
r"/\.DS_Store$",

# Common IDE directories
r"/\.vscode/",
r"/\.vs/",
r"/\.idea/",

# Git repository
r"/\.git/",

# Generated files and folders
r"/[^/]+\.zip$",

# .cache files and folders
r"/[^/]*\.cache($|/)",

# Common developer .dotfiles
r"/\.gitignore$",
r"/\.gitattributes$",
r"/\.gitmodules$",
r"/\.editorconfig$",
r"/\.dockerignore$",
r"/\.eslintignore$",
r"/\.eslintrc$",
r"/\.stylelintignore$",
r"/\.stylelintrc$",
r"/\.browserslistrc$",
r"/\.babelrc$",
r"/\.codecov\.yml$",
r"/\.coveragerc$",
r"/\.pre-commit-config\.yaml$",
r"/\.pyspelling\.yml$",
r"/\.flake8$",
]

0 comments on commit d28901e

Please sign in to comment.