diff --git a/b2sdk/_internal/scan/folder.py b/b2sdk/_internal/scan/folder.py index 54b3ca2fb..254a5f55b 100644 --- a/b2sdk/_internal/scan/folder.py +++ b/b2sdk/_internal/scan/folder.py @@ -11,8 +11,6 @@ import logging import os -import platform -import re import stat import sys from abc import ABCMeta, abstractmethod @@ -20,6 +18,7 @@ from typing import Iterator from ..utils import fix_windows_path_limit, get_file_mtime, validate_b2_file_name +from ..utils.filesystem import validate_b2_file_name_as_path from .exception import ( EmptyDirectory, EnvironmentEncodingError, @@ -31,21 +30,6 @@ from .policies import DEFAULT_SCAN_MANAGER, ScanPoliciesManager from .report import ProgressReport -DRIVE_MATCHER = re.compile(r'^([A-Za-z]):([/\\])') -ABSOLUTE_PATH_MATCHER = re.compile(r'^(/)|^(\\)') -RELATIVE_PATH_MATCHER = re.compile( - # "abc" and "xyz" represent anything, including "nothing" - r'^(\.\.[/\\])|' # ../abc or ..\abc - + r'^(\.[/\\])|' # ./abc or .\abc - + r'([/\\]\.\.[/\\])|' # abc/../xyz or abc\..\xyz or abc\../xyz or abc/..\xyz - + r'([/\\]\.[/\\])|' # abc/./xyz or abc\.\xyz or abc\./xyz or abc/.\xyz - + r'([/\\]\.\.)$|' # abc/.. or abc\.. - + r'([/\\]\.)$|' # abc/. or abc\. - + r'^(\.\.)$|' # just ".." - + r'([/\\][/\\])|' # abc\/xyz or abc/\xyz or abc//xyz or abc\\xyz - + r'^(\.)$' # just "." -) - logger = logging.getLogger(__name__) @@ -436,21 +420,10 @@ def get_file_versions(self): yield file_version def _validate_file_name(self, file_name): - # Do not allow relative paths in file names - if RELATIVE_PATH_MATCHER.search(file_name): - raise UnsupportedFilename( - 'scan does not support file names that include relative paths', file_name - ) - # Do not allow absolute paths in file names - if ABSOLUTE_PATH_MATCHER.search(file_name): - raise UnsupportedFilename( - 'scan does not support file names with absolute paths', file_name - ) - # On Windows, do not allow drive letters in file names - if platform.system() == 'Windows' and DRIVE_MATCHER.search(file_name): - raise UnsupportedFilename( - 'scan does not support file names with drive letters', file_name - ) + try: + validate_b2_file_name_as_path(file_name) + except ValueError as exc: + raise UnsupportedFilename(str(exc), file_name) from exc def folder_type(self): """ diff --git a/b2sdk/_internal/utils/filesystem.py b/b2sdk/_internal/utils/filesystem.py index 1a7c26548..f9a65e261 100644 --- a/b2sdk/_internal/utils/filesystem.py +++ b/b2sdk/_internal/utils/filesystem.py @@ -9,10 +9,26 @@ ###################################################################### import pathlib import platform +import re import stat _IS_WINDOWS = platform.system() == 'Windows' +DRIVE_MATCHER = re.compile(r'^([A-Za-z]):([/\\])') +ABSOLUTE_PATH_MATCHER = re.compile(r'^(/)|^(\\)') +RELATIVE_PATH_MATCHER = re.compile( + # "abc" and "xyz" represent anything, including "nothing" + r'^(\.\.[/\\])|' # ../abc or ..\abc + + r'^(\.[/\\])|' # ./abc or .\abc + + r'([/\\]\.\.[/\\])|' # abc/../xyz or abc\..\xyz or abc\../xyz or abc/..\xyz + + r'([/\\]\.[/\\])|' # abc/./xyz or abc\.\xyz or abc\./xyz or abc/.\xyz + + r'([/\\]\.\.)$|' # abc/.. or abc\.. + + r'([/\\]\.)$|' # abc/. or abc\. + + r'^(\.\.)$|' # just ".." + + r'([/\\][/\\])|' # abc\/xyz or abc/\xyz or abc//xyz or abc\\xyz + + r'^(\.)$' # just "." +) + def points_to_fifo(path: pathlib.Path) -> bool: """Check if the path points to a fifo.""" @@ -33,3 +49,17 @@ def points_to_stdout(path: pathlib.Path) -> bool: return path == STDOUT_FILEPATH or path.resolve() == STDOUT_FILEPATH except OSError: return False + + +def validate_b2_file_name_as_path(file_name: str) -> None: + """ + Ensure a B2 file name is safe to interpret as a local path. + """ + if RELATIVE_PATH_MATCHER.search(file_name): + raise ValueError('File names containing relative path components are not supported') + + if ABSOLUTE_PATH_MATCHER.search(file_name): + raise ValueError('File names containing absolute path components are not supported') + + if _IS_WINDOWS and DRIVE_MATCHER.search(file_name): + raise ValueError('File names containing Windows drive letters are not supported') diff --git a/b2sdk/v3/__init__.py b/b2sdk/v3/__init__.py index e56691c57..6d8045d68 100644 --- a/b2sdk/v3/__init__.py +++ b/b2sdk/v3/__init__.py @@ -81,6 +81,7 @@ def filter(self, record): points_to_fifo, points_to_stdout, STDOUT_FILEPATH, + validate_b2_file_name_as_path, ) from b2sdk._internal.utils import trace_call from b2sdk._internal.utils.docs import get_b2sdk_doc_urls diff --git a/changelog.d/+path-validation-helper.added.md b/changelog.d/+path-validation-helper.added.md new file mode 100644 index 000000000..f22ca87af --- /dev/null +++ b/changelog.d/+path-validation-helper.added.md @@ -0,0 +1 @@ +Introduce a reusable validator to ensure remote B2 file names are safe to use as local paths. diff --git a/test/unit/utils/test_filesystem.py b/test/unit/utils/test_filesystem.py index b692adf6a..0b97cf3a1 100644 --- a/test/unit/utils/test_filesystem.py +++ b/test/unit/utils/test_filesystem.py @@ -16,6 +16,7 @@ STDOUT_FILEPATH, points_to_fifo, points_to_stdout, + validate_b2_file_name_as_path, ) EXPECTED_STDOUT_PATH = pathlib.Path('CON' if platform.system() == 'Windows' else '/dev/stdout') @@ -51,3 +52,47 @@ def test_non_stdout_path(self, tmp_path): def test_non_existent_stdout_path(self, tmp_path): path = tmp_path / 'file.txt' assert points_to_stdout(path) is False + + +class TestValidateB2FileNameAsPath: + @pytest.mark.parametrize('file_name', ['file.txt', 'dir/file.txt', 'dir\\file.txt']) + def test_valid_file_name(self, file_name): + validate_b2_file_name_as_path(file_name) + + @pytest.mark.parametrize( + ('file_name', 'reason_pattern'), + [ + ( + '../file.txt', + r'relative path', + ), + ( + './file.txt', + r'relative path', + ), + ( + 'dir/../file.txt', + r'relative path', + ), + ( + 'dir//file.txt', + r'relative path', + ), + ('/file.txt', r'absolute path'), + ('\\file.txt', r'absolute path'), + ], + ) + def test_invalid_file_name(self, file_name, reason_pattern): + with pytest.raises(ValueError, match=reason_pattern): + validate_b2_file_name_as_path(file_name) + + @pytest.mark.skipif( + platform.system() != 'Windows', + reason='drive letters in paths are only forbidden on Windows', + ) + def test_drive_letter_disallowed_on_windows(self): + with pytest.raises( + ValueError, + match=r'drive letters', + ): + validate_b2_file_name_as_path(r'C:\file.txt')