Unverified Commit 6733b09b by Johannes vom Dorp Committed by GitHub

Merge pull request #2 from fkie-cad/add_safe_rglob

Add safe rglob
parents aa2fc1e5 fb1eb4f3
# general things to ignore # general things to ignore
.project
.pydevproject
.coverage
build/
dist/
*.egg-info/
*.egg *.egg
*.egg-info/
*.py[cod] *.py[cod]
__pycache__/
*.so *.so
*~ *~
.cache .cache
.coverage
.idea
.project
.pydevproject
__pycache__/
build/
dist/
from .fail_safe_file_operations import get_binary_from_file, get_string_list_from_file, write_binary_to_file, get_safe_name, delete_file, get_files_in_dir, get_dirs_in_dir, create_symlink, get_dir_of_file from .fail_safe_file_operations import (
get_binary_from_file, get_string_list_from_file, write_binary_to_file, get_safe_name, delete_file, get_files_in_dir,
get_dirs_in_dir, create_symlink, get_dir_of_file, safe_rglob
)
from .file_functions import read_in_chunks, get_directory_for_filename, create_dir_for_file, human_readable_file_size from .file_functions import read_in_chunks, get_directory_for_filename, create_dir_for_file, human_readable_file_size
from .git_functions import get_version_string_from_git from .git_functions import get_version_string_from_git
__all__ = [ __all__ = [
'get_directory_for_filename',
'create_dir_for_file', 'create_dir_for_file',
'human_readable_file_size', 'create_symlink',
'read_in_chunks', 'delete_file',
'get_version_string_from_git',
'get_binary_from_file', 'get_binary_from_file',
'get_dir_of_file',
'get_directory_for_filename',
'get_dirs_in_dir',
'get_files_in_dir',
'get_safe_name',
'get_string_list_from_file', 'get_string_list_from_file',
'get_version_string_from_git',
'human_readable_file_size',
'read_in_chunks',
'safe_rglob',
'write_binary_to_file', 'write_binary_to_file',
'get_safe_name',
'delete_file',
'create_symlink',
'get_files_in_dir',
'get_dirs_in_dir',
'get_dir_of_file'
] ]
...@@ -2,6 +2,8 @@ import logging ...@@ -2,6 +2,8 @@ import logging
import os import os
import re import re
import sys import sys
from pathlib import Path
from typing import Iterable
from .file_functions import create_dir_for_file from .file_functions import create_dir_for_file
...@@ -194,8 +196,8 @@ def get_dir_of_file(file_path): ...@@ -194,8 +196,8 @@ def get_dir_of_file(file_path):
''' '''
Returns absolute path of the directory including file Returns absolute path of the directory including file
:param file_path: Paht of the file :param file_path: Path of the file
:type: paht-like object :type: path-like object
:return: string :return: string
''' '''
try: try:
...@@ -203,3 +205,33 @@ def get_dir_of_file(file_path): ...@@ -203,3 +205,33 @@ def get_dir_of_file(file_path):
except Exception as e: except Exception as e:
logging.error('Could not get directory path: {} {}'.format(sys.exc_info()[0].__name__, e)) logging.error('Could not get directory path: {} {}'.format(sys.exc_info()[0].__name__, e))
return '/' return '/'
def safe_rglob(path: Path, include_symlinks: bool = True, include_directories: bool = True) -> Iterable[Path]:
'''
alternative to pathlib.rglob which tries to follow symlinks and crashes if it encounters certain broken ones
'''
if not path.is_symlink() and path.is_dir():
for child_path in path.iterdir():
yield from _iterate_path_recursively(child_path, include_symlinks, include_directories)
else:
yield from []
def _iterate_path_recursively(path: Path, include_symlinks: bool = True, include_directories: bool = True):
try:
if path.is_symlink():
if include_symlinks and (path.is_file() or path.is_dir()):
yield path
elif path.is_file():
yield path
elif path.is_dir():
if include_directories:
yield path
for child_path in path.iterdir():
yield from _iterate_path_recursively(child_path, include_symlinks, include_directories)
except PermissionError:
logging.error('Permission Error: could not access path {path}'.format(path=path.absolute()))
except OSError:
logging.warning('possible broken symlink: {path}'.format(path=path.absolute()))
yield from []
from setuptools import setup, find_packages from setuptools import setup, find_packages
VERSION = '0.2.1' VERSION = '0.2.2'
setup( setup(
name='common_helper_files', name='common_helper_files',
......
nonexistent
\ No newline at end of file
test_folder
\ No newline at end of file
recursive_broken_link
\ No newline at end of file
import os import os
from tempfile import TemporaryDirectory
import unittest import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest import pytest
from common_helper_files import get_binary_from_file, \ from common_helper_files import (
write_binary_to_file, delete_file, get_safe_name, \ create_symlink, delete_file, get_safe_name, get_binary_from_file, get_dir_of_file, get_directory_for_filename,
get_files_in_dir, get_string_list_from_file, \ get_dirs_in_dir, get_files_in_dir, get_string_list_from_file, safe_rglob, write_binary_to_file
get_dirs_in_dir, get_directory_for_filename, \ )
create_symlink, get_dir_of_file from common_helper_files.fail_safe_file_operations import _get_counted_file_path, _rm_cr
from common_helper_files.fail_safe_file_operations import _get_counted_file_path,\
_rm_cr
class Test_FailSafeFileOperations(unittest.TestCase): EMPTY_FOLDER = Path(get_directory_for_filename(__file__)).parent / 'tests' / 'data' / 'empty_folder'
EMPTY_FOLDER.mkdir(exist_ok=True)
class TestFailSafeFileOperations(unittest.TestCase):
def setUp(self): def setUp(self):
self.tmp_dir = TemporaryDirectory(prefix="test_common_helper_file") self.tmp_dir = TemporaryDirectory(prefix="test_common_helper_file")
...@@ -49,7 +53,7 @@ class Test_FailSafeFileOperations(unittest.TestCase): ...@@ -49,7 +53,7 @@ class Test_FailSafeFileOperations(unittest.TestCase):
read_binary = get_binary_from_file(file_path) read_binary = get_binary_from_file(file_path)
self.assertEqual(read_binary, b'this is a test', "written data not correct") self.assertEqual(read_binary, b'this is a test', "written data not correct")
# Test not overwrite flag # Test not overwrite flag
write_binary_to_file(b'do not overwirte', file_path, overwrite=False) write_binary_to_file(b'do not overwrite', file_path, overwrite=False)
read_binary = get_binary_from_file(file_path) read_binary = get_binary_from_file(file_path)
self.assertEqual(read_binary, b'this is a test', "written data not correct") self.assertEqual(read_binary, b'this is a test', "written data not correct")
# Test overwrite flag # Test overwrite flag
...@@ -100,8 +104,7 @@ class Test_FailSafeFileOperations(unittest.TestCase): ...@@ -100,8 +104,7 @@ class Test_FailSafeFileOperations(unittest.TestCase):
result = get_files_in_dir(test_dir_path) result = get_files_in_dir(test_dir_path)
self.assertIn(os.path.join(test_dir_path, "read_test"), result, "file in root folder not found") self.assertIn(os.path.join(test_dir_path, "read_test"), result, "file in root folder not found")
self.assertIn(os.path.join(test_dir_path, "test_folder/generic_test_file"), result, "file in sub folder not found") self.assertIn(os.path.join(test_dir_path, "test_folder/generic_test_file"), result, "file in sub folder not found")
print(result) self.assertEqual(len(result), 6, "number of found files not correct")
self.assertEqual(len(result), 4, "number of found files not correct")
def test_get_files_in_dir_error(self): def test_get_files_in_dir_error(self):
result = get_files_in_dir("/none_existing/dir") result = get_files_in_dir("/none_existing/dir")
...@@ -130,6 +133,31 @@ class Test_FailSafeFileOperations(unittest.TestCase): ...@@ -130,6 +133,31 @@ class Test_FailSafeFileOperations(unittest.TestCase):
self.assertEqual(absolute_file_path_result, self.tmp_dir.name) self.assertEqual(absolute_file_path_result, self.tmp_dir.name)
@pytest.mark.parametrize('symlinks, directories, expected_number', [
(True, True, 7),
(False, True, 5),
(True, False, 5),
(False, False, 3),
])
def test_safe_rglob(symlinks, directories, expected_number):
test_dir_path = Path(TestFailSafeFileOperations.get_directory_of_current_file()).parent / 'tests' / 'data'
result = list(safe_rglob(test_dir_path, include_symlinks=symlinks, include_directories=directories))
assert len(result) == expected_number
def test_safe_rglob_invalid_path():
test_path = Path('foo', 'bar')
assert not test_path.exists()
result = safe_rglob(test_path)
assert len(list(result)) == 0
def test_safe_rglob_empty_dir():
assert EMPTY_FOLDER.exists()
result = safe_rglob(EMPTY_FOLDER)
assert len(list(result)) == 0
@pytest.mark.parametrize('input_data, expected', [ @pytest.mark.parametrize('input_data, expected', [
('abc', 'abc'), ('abc', 'abc'),
('ab\r\nc', 'ab\nc')]) ('ab\r\nc', 'ab\nc')])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment