diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index d9b46df507dfd7..661490cd73c00d 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -862,6 +862,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(col_offset)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(command)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(comment_factory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile_mode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context)); @@ -915,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(escape)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type)); @@ -943,6 +945,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filter)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filters)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(final)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find_class)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); @@ -1086,6 +1089,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_unnamed_fields)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(names)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg)); @@ -1099,6 +1103,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nlocals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_depth)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(normcase)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ns)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nstype)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nt)); @@ -1128,6 +1133,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pat)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pattern)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(peek)); @@ -1225,6 +1231,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict_mode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(string)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub_key)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 10773d7a6c7e3f..2882390f9780c2 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -351,6 +351,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(col_offset) STRUCT_FOR_ID(command) STRUCT_FOR_ID(comment_factory) + STRUCT_FOR_ID(compile) STRUCT_FOR_ID(compile_mode) STRUCT_FOR_ID(consts) STRUCT_FOR_ID(context) @@ -404,6 +405,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(entrypoint) STRUCT_FOR_ID(env) STRUCT_FOR_ID(errors) + STRUCT_FOR_ID(escape) STRUCT_FOR_ID(event) STRUCT_FOR_ID(eventmask) STRUCT_FOR_ID(exc_type) @@ -432,6 +434,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(filter) STRUCT_FOR_ID(filters) STRUCT_FOR_ID(final) + STRUCT_FOR_ID(find) STRUCT_FOR_ID(find_class) STRUCT_FOR_ID(fix_imports) STRUCT_FOR_ID(flags) @@ -575,6 +578,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(n_unnamed_fields) STRUCT_FOR_ID(name) STRUCT_FOR_ID(name_from) + STRUCT_FOR_ID(names) STRUCT_FOR_ID(namespace_separator) STRUCT_FOR_ID(namespaces) STRUCT_FOR_ID(narg) @@ -588,6 +592,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(nlocals) STRUCT_FOR_ID(node_depth) STRUCT_FOR_ID(node_offset) + STRUCT_FOR_ID(normcase) STRUCT_FOR_ID(ns) STRUCT_FOR_ID(nstype) STRUCT_FOR_ID(nt) @@ -617,6 +622,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(pages) STRUCT_FOR_ID(parent) STRUCT_FOR_ID(password) + STRUCT_FOR_ID(pat) STRUCT_FOR_ID(path) STRUCT_FOR_ID(pattern) STRUCT_FOR_ID(peek) @@ -714,6 +720,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(strict) STRUCT_FOR_ID(strict_mode) STRUCT_FOR_ID(string) + STRUCT_FOR_ID(sub) STRUCT_FOR_ID(sub_key) STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 618f8d0a36b6c3..ecd624e4bca02a 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -860,6 +860,7 @@ extern "C" { INIT_ID(col_offset), \ INIT_ID(command), \ INIT_ID(comment_factory), \ + INIT_ID(compile), \ INIT_ID(compile_mode), \ INIT_ID(consts), \ INIT_ID(context), \ @@ -913,6 +914,7 @@ extern "C" { INIT_ID(entrypoint), \ INIT_ID(env), \ INIT_ID(errors), \ + INIT_ID(escape), \ INIT_ID(event), \ INIT_ID(eventmask), \ INIT_ID(exc_type), \ @@ -941,6 +943,7 @@ extern "C" { INIT_ID(filter), \ INIT_ID(filters), \ INIT_ID(final), \ + INIT_ID(find), \ INIT_ID(find_class), \ INIT_ID(fix_imports), \ INIT_ID(flags), \ @@ -1084,6 +1087,7 @@ extern "C" { INIT_ID(n_unnamed_fields), \ INIT_ID(name), \ INIT_ID(name_from), \ + INIT_ID(names), \ INIT_ID(namespace_separator), \ INIT_ID(namespaces), \ INIT_ID(narg), \ @@ -1097,6 +1101,7 @@ extern "C" { INIT_ID(nlocals), \ INIT_ID(node_depth), \ INIT_ID(node_offset), \ + INIT_ID(normcase), \ INIT_ID(ns), \ INIT_ID(nstype), \ INIT_ID(nt), \ @@ -1126,6 +1131,7 @@ extern "C" { INIT_ID(pages), \ INIT_ID(parent), \ INIT_ID(password), \ + INIT_ID(pat), \ INIT_ID(path), \ INIT_ID(pattern), \ INIT_ID(peek), \ @@ -1223,6 +1229,7 @@ extern "C" { INIT_ID(strict), \ INIT_ID(strict_mode), \ INIT_ID(string), \ + INIT_ID(sub), \ INIT_ID(sub_key), \ INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index f848a002c3b5d1..e114fca09aefe8 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1204,6 +1204,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(compile); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(compile_mode); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1416,6 +1420,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(escape); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(event); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1528,6 +1536,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(find); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(find_class); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2100,6 +2112,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(names); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(namespace_separator); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2152,6 +2168,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(normcase); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ns); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2268,6 +2288,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pat); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(path); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2656,6 +2680,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sub); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(sub_key); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 73acb1fe8d4106..1dc52f2575ae6c 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,24 +16,72 @@ __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] -def fnmatch(name, pat): - """Test whether FILENAME matches PATTERN. - - Patterns are Unix shell style: - - * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq - - An initial period in FILENAME is not special. - Both FILENAME and PATTERN are first case-normalized - if the operating system requires it. - If you don't want this, use fnmatchcase(FILENAME, PATTERN). - """ - name = os.path.normcase(name) - pat = os.path.normcase(pat) - return fnmatchcase(name, pat) +try: + from _fnmatch import filter +except ImportError: + def filter(names, pat): + """Construct a list from the names in *names* matching *pat*.""" + result = [] + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + for name in names: + if match(name): + result.append(name) + else: + for name in names: + if match(os.path.normcase(name)): + result.append(name) + return result + +try: + from _fnmatch import fnmatch +except ImportError: + def fnmatch(name, pat): + """Test whether *name* matches *pat*. + + Patterns are Unix shell style: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + + An initial period in *name* is not special. + Both *name* and *pat* are first case-normalized + if the operating system requires it. + + If you don't want this, use fnmatchcase(name, pat). + """ + name = os.path.normcase(name) + pat = os.path.normcase(pat) + return fnmatchcase(name, pat) + +try: + from _fnmatch import fnmatchcase +except ImportError: + def fnmatchcase(name, pat): + """Test whether *name* matches *pat*, including case. + + This is a version of fnmatch() which doesn't case-normalize + its arguments. + """ + match = _compile_pattern(pat) + return match(name) is not None + +try: + from _fnmatch import translate +except ImportError: + def translate(pat): + """Translate a shell pattern *pat* to a regular expression. + + There is no way to quote meta-characters. + """ + + STAR = object() + parts = _translate(pat, STAR, '.') + return _join_translated_parts(parts, STAR) @functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): @@ -45,43 +93,6 @@ def _compile_pattern(pat): res = translate(pat) return re.compile(res).match -def filter(names, pat): - """Construct a list from those elements of the iterable NAMES that match PAT.""" - result = [] - pat = os.path.normcase(pat) - match = _compile_pattern(pat) - if os.path is posixpath: - # normcase on posix is NOP. Optimize it away from the loop. - for name in names: - if match(name): - result.append(name) - else: - for name in names: - if match(os.path.normcase(name)): - result.append(name) - return result - -def fnmatchcase(name, pat): - """Test whether FILENAME matches PATTERN, including case. - - This is a version of fnmatch() which doesn't case-normalize - its arguments. - """ - match = _compile_pattern(pat) - return match(name) is not None - - -def translate(pat): - """Translate a shell PATTERN to a regular expression. - - There is no way to quote meta-characters. - """ - - STAR = object() - parts = _translate(pat, STAR, '.') - return _join_translated_parts(parts, STAR) - - def _translate(pat, STAR, QUESTION_MARK): res = [] add = res.append diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 10ed496d4e2f37..6ab244021ea20d 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -1,23 +1,29 @@ """Test cases for the fnmatch module.""" -import unittest +import itertools import os import string +import unittest import warnings -from fnmatch import fnmatch, fnmatchcase, translate, filter +import test.support.import_helper + +c_fnmatch = test.support.import_helper.import_fresh_module("_fnmatch") +py_fnmatch = test.support.import_helper.import_fresh_module("fnmatch", blocked=["_fnmatch"]) -class FnmatchTestCase(unittest.TestCase): +class FnmatchTestCaseMixin: + fnmatch = None - def check_match(self, filename, pattern, should_match=True, fn=fnmatch): - if should_match: - self.assertTrue(fn(filename, pattern), - "expected %r to match pattern %r" - % (filename, pattern)) - else: - self.assertFalse(fn(filename, pattern), - "expected %r not to match pattern %r" - % (filename, pattern)) + def check_match(self, filename, pattern, should_match=True, func=None): + if func is None: + func = self.fnmatch.fnmatch + + with self.subTest(fn=func, name=filename, pattern=pattern): + res = func(filename, pattern) + if should_match: + self.assertTrue(res, f"expected {filename!r} to match pattern {pattern!r}") + else: + self.assertFalse(res, f"expected {filename!r} not to match pattern {pattern!r}") def test_fnmatch(self): check = self.check_match @@ -54,13 +60,17 @@ def test_slow_fnmatch(self): check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) def test_mix_bytes_str(self): + fnmatch = self.fnmatch.fnmatch self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') + + fnmatchcase = self.fnmatch.fnmatchcase self.assertRaises(TypeError, fnmatchcase, 'test', b'*') self.assertRaises(TypeError, fnmatchcase, b'test', '*') def test_fnmatchcase(self): check = self.check_match + fnmatchcase = self.fnmatch.fnmatchcase check('abc', 'abc', True, fnmatchcase) check('AbC', 'abc', False, fnmatchcase) check('abc', 'AbC', False, fnmatchcase) @@ -216,11 +226,18 @@ def test_warnings(self): check(',', '[a-z+--A-Z]') check('.', '[a-z--/A-Z]') +class PurePythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase): + fnmatch = py_fnmatch + +class CPythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase): + fnmatch = c_fnmatch -class TranslateTestCase(unittest.TestCase): +class TranslateTestCaseMixin: + fnmatch = None def test_translate(self): import re + translate = self.fnmatch.translate self.assertEqual(translate('*'), r'(?s:.*)\Z') self.assertEqual(translate('?'), r'(?s:.)\Z') self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') @@ -250,32 +267,148 @@ def test_translate(self): self.assertTrue(re.match(fatre, 'cbabcaxc')) self.assertFalse(re.match(fatre, 'dabccbad')) -class FilterTestCase(unittest.TestCase): + def test_translate_wildcards(self): + for pattern, expect in [ + ('', r'(?s:)\Z'), + ('ab*', r'(?s:ab.*)\Z'), + ('ab*cd', r'(?s:ab.*cd)\Z'), + ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'), + ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\Z'), + ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\Z'), + ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'), + ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'), + ]: + translated = self.fnmatch.translate(pattern) + self.assertEqual(translated, expect, pattern) + + for pattern, expect in [ + ('*ab', r'(?s:.*ab)\Z'), + ('*ab*', r'(?s:(?>.*?ab).*)\Z'), + ('*ab*cd', r'(?s:(?>.*?ab).*cd)\Z'), + ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\Z'), + ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\Z'), + ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\Z'), + ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'), + ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'), + ]: + translated = self.fnmatch.translate(pattern) + self.assertEqual(translated, expect, pattern) + + def test_translate_expressions(self): + for pattern, expect in [ + ('[', r'(?s:\[)\Z'), + ('[!', r'(?s:\[!)\Z'), + ('[]', r'(?s:\[\])\Z'), + ('[abc', r'(?s:\[abc)\Z'), + ('[!abc', r'(?s:\[!abc)\Z'), + ('[abc]', r'(?s:[abc])\Z'), + ('[!abc]', r'(?s:[^abc])\Z'), + # with [[ + ('[[', r'(?s:\[\[)\Z'), + ('[[a', r'(?s:\[\[a)\Z'), + ('[[]', r'(?s:[\[])\Z'), + ('[[]a', r'(?s:[\[]a)\Z'), + ('[[]]', r'(?s:[\[]\])\Z'), + ('[[]a]', r'(?s:[\[]a\])\Z'), + ('[[a]', r'(?s:[\[a])\Z'), + ('[[a]]', r'(?s:[\[a]\])\Z'), + ('[[a]b', r'(?s:[\[a]b)\Z'), + # backslashes + ('[\\', r'(?s:\[\\)\Z'), + (r'[\]', r'(?s:[\\])\Z'), + (r'[\\]', r'(?s:[\\\\])\Z'), + ]: + translated = self.fnmatch.translate(pattern) + self.assertEqual(translated, expect, pattern) + +class PurePythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase): + fnmatch = py_fnmatch + +class CPythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase): + fnmatch = c_fnmatch + + @staticmethod + def translate_func(pattern): + # Pure Python implementation of translate() + STAR = object() + parts = py_fnmatch._translate(pattern, STAR, '.') + return py_fnmatch._join_translated_parts(parts, STAR) + + def test_translate(self): + # We want to check that the C implementation is EXACTLY the same + # as the Python implementation. For that, we will need to cover + # a lot of cases. + translate = self.fnmatch.translate + + for choice in itertools.combinations_with_replacement('*?.', 5): + for suffix in ['', '!']: + pat = suffix + ''.join(choice) + with self.subTest(pattern=pat): + self.assertEqual(translate(pat), self.translate_func(pat)) + + for pat in [ + '', + '!!a*', '!\\!a*', '!a*', '*', '**', '*******?', '*******c', '*****??', '**/', + '*.js', '*/man*/bash.*', '*???', '?', '?*****??', '?*****?c', '?***?****', + '?***?****?', '?***?****c', '?*?', '??', '???', '???*', '[!\\]', + '\\**', '\\*\\*', 'a*', 'a*****?c', 'a****c**?**??*****', 'a***c', + 'a**?**cd**?**??***k', 'a**?**cd**?**??***k**', 'a**?**cd**?**??k', + 'a**?**cd**?**??k***', 'a*[^c]', + 'a*cd**?**??k', 'a/*', 'a/**', 'a/**/b', + 'a/**/b/**/c', 'a/.*/c', 'a/?', 'a/??', 'a[X-]b', 'a[\\.]c', + 'a[\\b]c', 'a[bc', 'a\\*?/*', 'a\\*b/*', + 'ab[!de]', 'ab[cd]', 'ab[cd]ef', 'abc', 'b*/', 'foo*', + 'man/man1/bash.1' + ]: + with self.subTest(pattern=pat): + self.assertEqual(translate(pat), self.translate_func(pat)) + +class FilterTestCaseMixin: + fnmatch = None def test_filter(self): + filter = self.fnmatch.filter self.assertEqual(filter(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*'), ['Python', 'Perl']) self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'), [b'Python', b'Perl']) + def test_filter_iter_errors(self): + class BadList: + def __iter__(self): + yield 'abc' + raise ValueError("nope") + + with self.assertRaisesRegex(ValueError, r'^nope$'): + self.fnmatch.filter(BadList(), '*') + + def test_mix_bytes_str(self): + filter = self.fnmatch.filter self.assertRaises(TypeError, filter, ['test'], b'*') self.assertRaises(TypeError, filter, [b'test'], '*') def test_case(self): ignorecase = os.path.normcase('P') == os.path.normcase('p') + filter = self.fnmatch.filter self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'), ['Test.py', 'Test.PL'] if ignorecase else ['Test.py']) self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'), ['Test.py', 'Test.PL'] if ignorecase else ['Test.PL']) def test_sep(self): + filter = self.fnmatch.filter normsep = os.path.normcase('\\') == os.path.normcase('/') self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'), ['usr/bin', 'usr\\lib'] if normsep else ['usr/bin']) self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'), ['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib']) +class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase): + fnmatch = py_fnmatch + +class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase): + fnmatch = c_fnmatch if __name__ == "__main__": unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 9ea7bc49be316c..497e854e10edcf 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -3139,6 +3139,7 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@ MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@ MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@ +MODULE__FNMATCH_DEPS=$(srcdir)/Modules/_fnmatch/macros.h $(srcdir)/Modules/_fnmatch/util.h MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c diff --git a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst new file mode 100644 index 00000000000000..f374f28456d65d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst @@ -0,0 +1,2 @@ +Improve the performances of :func:`fnmatch.translate` by 2x and of +:func:`fnmatch.filter` by 1.1x. Patch by Bénédikt Tran. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index dfc75077650df8..f33af67aa26499 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -33,6 +33,7 @@ @MODULE__BISECT_TRUE@_bisect _bisectmodule.c @MODULE__CONTEXTVARS_TRUE@_contextvars _contextvarsmodule.c @MODULE__CSV_TRUE@_csv _csv.c +@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/filter.c _fnmatch/translate.c @MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c @MODULE__JSON_TRUE@_json _json.c @MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c new file mode 100644 index 00000000000000..4c05fa2d5d1b68 --- /dev/null +++ b/Modules/_fnmatch/_fnmatchmodule.c @@ -0,0 +1,470 @@ +/* + * C accelerator for the 'fnmatch' module. + * + * - Case normalization uses the runtime value of os.path.normcase(), + * forcing us to query the attribute each time. + * + * The C implementation of fnmatch.filter() uses the same os.path.normcase() + * when iterating over NAMES, ignoring side-effects on os.path.normcase() + * that may occur when processing a NAME in NAMES. + * + * More generally, os.path.normcase() is retrieved at most once per call + * to fnmatch.filter() or fnmatch.fnmatch(). + */ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "macros.h" +#include "util.h" // prototypes + +#include "pycore_runtime.h" // for _Py_ID() + +#include "clinic/_fnmatchmodule.c.h" + +#define LRU_CACHE_SIZE 32768 +#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object" + +// ==== Cached translation unit =============================================== + +/* + * Compile a UNIX shell pattern into a RE pattern + * and returns the corresponding 'match()' method. + * + * This function is LRU-cached by the module itself. + */ +static PyObject * +get_matcher_function_impl(PyObject *module, PyObject *pattern) +{ + // translate the pattern into a RE pattern + assert(module != NULL); + PyObject *translated = fnmatch_translate_impl(module, pattern); + if (translated == NULL) { + return NULL; + } + fnmatchmodule_state *st = get_fnmatchmodule_state(module); + // compile the pattern + PyObject *compile_func = PyObject_GetAttr(st->re_module, &_Py_ID(compile)); + if (compile_func == NULL) { + Py_DECREF(translated); + return NULL; + } + PyObject *compiled = PyObject_CallOneArg(compile_func, translated); + Py_DECREF(compile_func); + Py_DECREF(translated); + if (compiled == NULL) { + return NULL; + } + // get the compiled pattern matcher function + PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match)); + Py_DECREF(compiled); + return matcher; +} + +static PyMethodDef get_matcher_function_def = { + "get_matcher_function", + get_matcher_function_impl, + METH_O, + NULL +}; + +static int +fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st) +{ + // make sure that this function is called once + assert(st->translator == NULL); + PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE); + if (maxsize == NULL) { + return -1; + } + PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache"); + if (cache == NULL) { + Py_DECREF(maxsize); + return -1; + } + PyObject *args[3] = {NULL, maxsize, Py_True}; + size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET; + PyObject *wrapper = PyObject_Vectorcall(cache, &args[1], nargsf, NULL); + Py_DECREF(maxsize); + Py_DECREF(cache); + if (wrapper == NULL) { + return -1; + } + assert(module != NULL); + PyObject *wrapped = PyCFunction_New(&get_matcher_function_def, module); + // reference on 'translator' will be removed upon module cleanup + st->translator = PyObject_CallOneArg(wrapper, wrapped); + Py_DECREF(wrapped); + Py_DECREF(wrapper); + if (st->translator == NULL) { + return -1; + } + return 0; +} + +// ==== Cached re.escape() unit =============================================== + +/* Create an LRU-cached function for re.escape(). */ +static int +fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module), + fnmatchmodule_state *st) +{ + // make sure that this function is called once + assert(st->re_escape == NULL); + PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE); + if (maxsize == NULL) { + return -1; + } + PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache"); + if (cache == NULL) { + Py_DECREF(maxsize); + return -1; + } + PyObject *wrapper = PyObject_CallOneArg(cache, maxsize); + Py_DECREF(maxsize); + Py_DECREF(cache); + if (wrapper == NULL) { + return -1; + } + assert(st->re_module != NULL); + PyObject *wrapped = PyObject_GetAttr(st->re_module, &_Py_ID(escape)); + // reference on 'escapechar' will be removed upon module cleanup + st->re_escape = PyObject_CallOneArg(wrapper, wrapped); + Py_DECREF(wrapped); + Py_DECREF(wrapper); + if (st->re_escape == NULL) { + return -1; + } + return 0; +} + +// ==== Cached re.sub() unit for set operation tokens ========================= + +/* Create an LRU-cached function for re.compile('([&~|])').sub(). */ +static int +fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module), + fnmatchmodule_state *st) +{ + // make sure that this function is called once + assert(st->setops_re_subfn == NULL); + PyObject *pattern = PyUnicode_FromString("([&~|])"); + CHECK_NOT_NULL_OR_ABORT(pattern); + PyObject *compiled = PyObject_CallMethodOneArg(st->re_module, + &_Py_ID(compile), + pattern); + Py_DECREF(pattern); + CHECK_NOT_NULL_OR_ABORT(compiled); + st->setops_re_subfn = PyObject_GetAttr(compiled, &_Py_ID(sub)); + Py_DECREF(compiled); + CHECK_NOT_NULL_OR_ABORT(st->setops_re_subfn); + return 0; +abort: + return -1; +} + +// ==== Module data getters =================================================== + +static inline PyObject * /* reference to re.compile(pattern).match() */ +get_matcher_function(PyObject *module, PyObject *pattern) +{ + fnmatchmodule_state *st = get_fnmatchmodule_state(module); + assert(st->translator != NULL); + return PyObject_CallOneArg(st->translator, pattern); +} + +static inline PyObject * /* reference to os.path.normcase() */ +get_platform_normcase_function(PyObject *module) +{ + fnmatchmodule_state *st = get_fnmatchmodule_state(module); + PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path)); + if (os_path == NULL) { + return NULL; + } + PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase)); + Py_DECREF(os_path); + return normcase; +} + +// ==== Module state functions ================================================ + +static int +fnmatchmodule_exec(PyObject *module) +{ + // ---- def local macros -------------------------------------------------- + /* Import a named module and store it in 'STATE->ATTRIBUTE'. */ +#define IMPORT_MODULE(STATE, ATTRIBUTE, MODULE_NAME) \ + do { \ + /* make sure that the attribute is initialized once */ \ + assert(STATE->ATTRIBUTE == NULL); \ + STATE->ATTRIBUTE = PyImport_ImportModule((MODULE_NAME)); \ + CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE); \ + } while (0) + /* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */ +#define INTERN_STRING(STATE, ATTRIBUTE, STRING) \ + do { \ + /* make sure that the attribute is initialized once */ \ + assert(STATE->ATTRIBUTE == NULL); \ + STATE->ATTRIBUTE = PyUnicode_InternFromString((STRING)); \ + CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE); \ + } while (0) + // ------------------------------------------------------------------------ + fnmatchmodule_state *st = get_fnmatchmodule_state(module); + IMPORT_MODULE(st, os_module, "os"); + IMPORT_MODULE(st, re_module, "re"); + CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_translator(module, st)); + CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st)); + INTERN_STRING(st, hyphen_str, "-"); + INTERN_STRING(st, hyphen_esc_str, "\\-"); + INTERN_STRING(st, backslash_str, "\\"); + INTERN_STRING(st, backslash_esc_str, "\\\\"); + CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_setops_re_sub(module, st)); + INTERN_STRING(st, setops_repl_str, "\\\\\\1"); + return 0; +abort: + return -1; +#undef INTERN_STRING +#undef IMPORT_MODULE +} + +static int +fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg) +{ + fnmatchmodule_state *st = get_fnmatchmodule_state(m); + Py_VISIT(st->setops_repl_str); + Py_VISIT(st->setops_re_subfn); + Py_VISIT(st->backslash_esc_str); + Py_VISIT(st->backslash_str); + Py_VISIT(st->hyphen_esc_str); + Py_VISIT(st->hyphen_str); + Py_VISIT(st->re_escape); + Py_VISIT(st->translator); + Py_VISIT(st->re_module); + Py_VISIT(st->os_module); + return 0; +} + +static int +fnmatchmodule_clear(PyObject *m) +{ + fnmatchmodule_state *st = get_fnmatchmodule_state(m); + Py_CLEAR(st->setops_repl_str); + Py_CLEAR(st->setops_re_subfn); + Py_CLEAR(st->backslash_esc_str); + Py_CLEAR(st->backslash_str); + Py_CLEAR(st->hyphen_esc_str); + Py_CLEAR(st->hyphen_str); + Py_CLEAR(st->re_escape); + Py_CLEAR(st->translator); + Py_CLEAR(st->re_module); + Py_CLEAR(st->os_module); + return 0; +} + +static inline void +fnmatchmodule_free(void *m) +{ + (void)fnmatchmodule_clear((PyObject *)m); +} + +/*[clinic input] +module fnmatch +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=797aa965370a9ef2]*/ + +/*[clinic input] +fnmatch.filter -> object + + names: object + pat as pattern: object + +Construct a list from the names in *names* matching *pat*. + +[clinic start generated code]*/ + +static PyObject * +fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern) +/*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/ +{ + PyObject *normcase = NULL; // for the 'goto abort' statements + normcase = get_platform_normcase_function(module); + CHECK_NOT_NULL_OR_ABORT(normcase); + PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern); + CHECK_NOT_NULL_OR_ABORT(normalized_pattern); + // the matcher is cached with respect to the *normalized* pattern + PyObject *matcher = get_matcher_function(module, normalized_pattern); + Py_DECREF(normalized_pattern); + CHECK_NOT_NULL_OR_ABORT(matcher); + PyObject *filtered = _Py_fnmatch_filter(matcher, names, normcase); + Py_DECREF(matcher); + Py_DECREF(normcase); + return filtered; +abort: + Py_XDECREF(normcase); + return NULL; +} + +/*[clinic input] +fnmatch.fnmatch -> bool + + name: object + pat as pattern: object + +Test whether *name* matches *pat*. + +Patterns are Unix shell style: + +* matches everything +? matches any single character +[seq] matches any character in seq +[!seq] matches any char not in seq + +An initial period in *name* is not special. +Both *name* and *pat* are first case-normalized +if the operating system requires it. + +If you don't want this, use fnmatchcase(name, pat). + +[clinic start generated code]*/ + +static int +fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern) +/*[clinic end generated code: output=c9dc542e8d6933b6 input=279a4a4f2ddea6a2]*/ +{ + PyObject *normcase = get_platform_normcase_function(module); + if (normcase == NULL) { + return -1; + } + // apply case normalization on both arguments + PyObject *norm_name = PyObject_CallOneArg(normcase, name); + if (norm_name == NULL) { + Py_DECREF(normcase); + return -1; + } + PyObject *norm_pattern = PyObject_CallOneArg(normcase, pattern); + Py_DECREF(normcase); + if (norm_pattern == NULL) { + Py_DECREF(norm_name); + return -1; + } + int matching = fnmatch_fnmatchcase_impl(module, norm_name, norm_pattern); + Py_DECREF(norm_pattern); + Py_DECREF(norm_name); + return matching; +} + +/*[clinic input] +fnmatch.fnmatchcase -> bool + + name: object + pat as pattern: object + +Test whether *name* matches *pat*, including case. + +This is a version of fnmatch() which doesn't case-normalize +its arguments. +[clinic start generated code]*/ + +static int +fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern) +/*[clinic end generated code: output=4d6b268169001876 input=91d62999c08fd55e]*/ +{ + // fnmatchcase() does not apply any case normalization on the inputs + PyObject *matcher = get_matcher_function(module, pattern); + if (matcher == NULL) { + return -1; + } + // If 'name' is of incorrect type, it will be detected when calling + // the matcher function (we check 're.compile(pattern).match(name)'). + PyObject *match = PyObject_CallOneArg(matcher, name); + Py_DECREF(matcher); + int matching = match == NULL ? -1 : !Py_IsNone(match); + Py_XDECREF(match); + return matching; +} + +/*[clinic input] +fnmatch.translate -> object + + pat as pattern: object + +Translate a shell pattern *pat* to a regular expression. + +There is no way to quote meta-characters. +[clinic start generated code]*/ + +static PyObject * +fnmatch_translate_impl(PyObject *module, PyObject *pattern) +/*[clinic end generated code: output=77e0f5de9fbb59bd input=2cc1203a34c571fd]*/ +{ + if (PyBytes_Check(pattern)) { + PyObject *decoded = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern), + PyBytes_GET_SIZE(pattern), + "strict"); + CHECK_NOT_NULL_OR_ABORT(decoded); + PyObject *translated = _Py_fnmatch_translate(module, decoded); + Py_DECREF(decoded); + CHECK_NOT_NULL_OR_ABORT(translated); + PyObject *res = PyUnicode_AsLatin1String(translated); + Py_DECREF(translated); + return res; + } + else if (PyUnicode_Check(pattern)) { + return _Py_fnmatch_translate(module, pattern); + } + else { + PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE); + return NULL; + } +abort: + return NULL; +} + +// ==== Module specs ========================================================== + +// fmt: off +PyDoc_STRVAR(fnmatchmodule_doc, +"Filename matching with shell patterns.\n" +"fnmatch(FILENAME, PATTERN) matches according to the local convention.\n" +"fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\n" +"The functions operate by translating the pattern into a regular\n" +"expression. They cache the compiled regular expressions for speed.\n\n" +"The function translate(PATTERN) returns a regular expression\n" +"corresponding to PATTERN. (It does not compile it.)"); +// fmt: on + +static PyMethodDef fnmatchmodule_methods[] = { + FNMATCH_FILTER_METHODDEF + FNMATCH_FNMATCH_METHODDEF + FNMATCH_FNMATCHCASE_METHODDEF + FNMATCH_TRANSLATE_METHODDEF + {NULL, NULL} +}; + +static struct PyModuleDef_Slot fnmatchmodule_slots[] = { + {Py_mod_exec, fnmatchmodule_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static struct PyModuleDef _fnmatchmodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_fnmatch", + .m_doc = fnmatchmodule_doc, + .m_size = sizeof(fnmatchmodule_state), + .m_methods = fnmatchmodule_methods, + .m_slots = fnmatchmodule_slots, + .m_traverse = fnmatchmodule_traverse, + .m_clear = fnmatchmodule_clear, + .m_free = fnmatchmodule_free, +}; + +PyMODINIT_FUNC +PyInit__fnmatch(void) +{ + return PyModuleDef_Init(&_fnmatchmodule); +} + +#undef INVALID_PATTERN_TYPE +#undef COMPILED_CACHE_SIZE diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h new file mode 100644 index 00000000000000..c611f01673b326 --- /dev/null +++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h @@ -0,0 +1,264 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(fnmatch_filter__doc__, +"filter($module, /, names, pat)\n" +"--\n" +"\n" +"Construct a list from the names in *names* matching *pat*."); + +#define FNMATCH_FILTER_METHODDEF \ + {"filter", _PyCFunction_CAST(fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, fnmatch_filter__doc__}, + +static PyObject * +fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern); + +static PyObject * +fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(names), &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"names", "pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "filter", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *names; + PyObject *pattern; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + names = args[0]; + pattern = args[1]; + return_value = fnmatch_filter_impl(module, names, pattern); + +exit: + return return_value; +} + +PyDoc_STRVAR(fnmatch_fnmatch__doc__, +"fnmatch($module, /, name, pat)\n" +"--\n" +"\n" +"Test whether *name* matches *pat*.\n" +"\n" +"Patterns are Unix shell style:\n" +"\n" +"* matches everything\n" +"? matches any single character\n" +"[seq] matches any character in seq\n" +"[!seq] matches any char not in seq\n" +"\n" +"An initial period in *name* is not special.\n" +"Both *name* and *pat* are first case-normalized\n" +"if the operating system requires it.\n" +"\n" +"If you don\'t want this, use fnmatchcase(name, pat)."); + +#define FNMATCH_FNMATCH_METHODDEF \ + {"fnmatch", _PyCFunction_CAST(fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatch__doc__}, + +static int +fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern); + +static PyObject * +fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", "pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "fnmatch", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *name; + PyObject *pattern; + int _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + name = args[0]; + pattern = args[1]; + _return_value = fnmatch_fnmatch_impl(module, name, pattern); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(fnmatch_fnmatchcase__doc__, +"fnmatchcase($module, /, name, pat)\n" +"--\n" +"\n" +"Test whether *name* matches *pat*, including case.\n" +"\n" +"This is a version of fnmatch() which doesn\'t case-normalize\n" +"its arguments."); + +#define FNMATCH_FNMATCHCASE_METHODDEF \ + {"fnmatchcase", _PyCFunction_CAST(fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatchcase__doc__}, + +static int +fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern); + +static PyObject * +fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", "pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "fnmatchcase", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *name; + PyObject *pattern; + int _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + name = args[0]; + pattern = args[1]; + _return_value = fnmatch_fnmatchcase_impl(module, name, pattern); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(fnmatch_translate__doc__, +"translate($module, /, pat)\n" +"--\n" +"\n" +"Translate a shell pattern *pat* to a regular expression.\n" +"\n" +"There is no way to quote meta-characters."); + +#define FNMATCH_TRANSLATE_METHODDEF \ + {"translate", _PyCFunction_CAST(fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, fnmatch_translate__doc__}, + +static PyObject * +fnmatch_translate_impl(PyObject *module, PyObject *pattern); + +static PyObject * +fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "translate", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *pattern; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + pattern = args[0]; + return_value = fnmatch_translate_impl(module, pattern); + +exit: + return return_value; +} +/*[clinic end generated code: output=50f858ef4bfb569a input=a9049054013a1b77]*/ diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c new file mode 100644 index 00000000000000..bd1d6c8ec85073 --- /dev/null +++ b/Modules/_fnmatch/filter.c @@ -0,0 +1,48 @@ +/* + * Provide the implementation of the high-level matcher-based functions. + */ + +#include "Python.h" + +PyObject * +_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase) +{ + assert(normcase != NULL); + PyObject *iter = PyObject_GetIter(names); + if (iter == NULL) { + return NULL; + } + PyObject *res = PyList_New(0); + if (res == NULL) { + Py_DECREF(iter); + return NULL; + } + PyObject *name = NULL; + while ((name = PyIter_Next(iter))) { + PyObject *normalized = PyObject_CallOneArg(normcase, name); + if (normalized == NULL) { + goto abort; + } + PyObject *match = PyObject_CallOneArg(matcher, normalized); + Py_DECREF(normalized); + if (match == NULL) { + goto abort; + } + int matching = Py_IsNone(match) == 0; + Py_DECREF(match); + if (matching && PyList_Append(res, name) < 0) { + goto abort; + } + Py_DECREF(name); + } + Py_DECREF(iter); + if (PyErr_Occurred()) { + Py_CLEAR(res); + } + return res; +abort: + Py_DECREF(name); + Py_DECREF(iter); + Py_DECREF(res); + return NULL; +} diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h new file mode 100644 index 00000000000000..a39586338ea62a --- /dev/null +++ b/Modules/_fnmatch/macros.h @@ -0,0 +1,132 @@ +/* + * This file contains various macro definitions in order to reduce the + * number of lines in '_fnmatch'. Do not use them for something else. + */ + +#ifndef _FNMATCH_MACROS_H +#define _FNMATCH_MACROS_H + +// ==== Macro definitions ===================================================== + +/* + * Check that STATUS is >= 0 or execute 'goto abort'. + * + * This macro is provided for convenience and should be + * carefully used if more resources should be released + * before jumping to the 'abort' label. + */ +#define CHECK_RET_CODE_OR_ABORT(STATUS) \ + do { \ + if ((STATUS) < 0) { \ + assert(PyErr_Occurred()); \ + goto abort; \ + } \ + } while (0) + +/* + * Identical to CHECK_RET_CODE_OR_ABORT but where the + * argument is semantically used as a positive integer. + */ +#define CHECK_UNSIGNED_INT_OR_ABORT CHECK_RET_CODE_OR_ABORT + +/* + * Check that OBJ is not NULL or execute 'goto abort'. + * + * This macro is provided for convenience and should be + * carefully used if more resources should be released + * before jumping to the 'abort' label. + */ +#define CHECK_NOT_NULL_OR_ABORT(OBJ) \ + do { \ + if ((OBJ) == NULL) { \ + goto abort; \ + } \ + } while (0) + +// The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs +// since they directly delegate to the _PyUnicodeWriter_Write* underlying +// function. In particular, the caller is responsible for type safety. + +/* Cast WRITER and call _PyUnicodeWriter_WriteChar(). */ +#define _WRITE_CHAR(WRITER, CHAR) \ + _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR)) + +/* Cast WRITER and call _PyUnicodeWriter_WriteASCIIString(). */ +#define _WRITE_ASCII(WRITER, STRING, LENGTH) \ + _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \ + (STRING), (LENGTH)) + +/* Cast WRITER and call _PyUnicodeWriter_WriteStr(). */ +#define _WRITE_STRING(WRITER, STRING) \ + _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING)) + +/* Cast WRITER and call _PyUnicodeWriter_WriteSubstring(). */ +#define _WRITE_SUBSTRING(WRITER, STRING, START, STOP) \ + _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER), \ + (STRING), (START), (STOP)) + +// ---------------------------------------------------------------------------- + +/* Write the character CHAR or jump to the 'abort' label on failure. */ +#define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \ + CHECK_RET_CODE_OR_ABORT(_WRITE_CHAR((WRITER), (CHAR))) + +/* + * Write an ASCII string STRING of given length LENGTH, + * or jump to the 'abort' label on failure. + */ +#define WRITE_ASCII_OR_ABORT(WRITER, ASCII, LENGTH) \ + CHECK_RET_CODE_OR_ABORT(_WRITE_ASCII((WRITER), (ASCII), (LENGTH))) + +/* Write the string STRING or jump to the 'abort' label on failure. */ +#define WRITE_STRING_OR_ABORT(WRITER, STRING) \ + CHECK_RET_CODE_OR_ABORT(_WRITE_STRING((WRITER), (STRING))) + +/* + * Write the substring STRING[START:STOP] (no-op if empty) + * or jump to the 'abort' label on failure. + */ +#define WRITE_SUBSTRING_OR_ABORT(WRITER, STRING, START, STOP) \ + do { \ + const Py_ssize_t _START = (START); \ + const Py_ssize_t _STOP = (STOP); \ + int _RC = _WRITE_SUBSTRING((WRITER), (STRING), _START, _STOP); \ + CHECK_RET_CODE_OR_ABORT(_RC); \ + } while (0) + +// ---------------------------------------------------------------------------- + +/* Replace backslashes in STRING by escaped backslashes. */ +#define BACKSLASH_REPLACE(STATE, STRING) \ + PyObject_CallMethodObjArgs( \ + (STRING), \ + &_Py_ID(replace), \ + (STATE)->backslash_str, \ + (STATE)->backslash_esc_str, \ + NULL \ + ) + +/* Replace hyphens in STRING by escaped hyphens. */ +#define HYPHEN_REPLACE(STATE, STRING) \ + PyObject_CallMethodObjArgs( \ + (STRING), \ + &_Py_ID(replace), \ + (STATE)->hyphen_str, \ + (STATE)->hyphen_esc_str, \ + NULL \ + ) + +/* + * Escape set operations in STRING using re.sub(). + * + * SETOPS_RE_SUB_METH is a reference to re.compile('([&~|])').sub(). + */ +#define SETOPS_REPLACE(STATE, STRING, SETOPS_RE_SUB_METH) \ + PyObject_CallFunctionObjArgs( \ + (SETOPS_RE_SUB_METH), \ + (STATE)->setops_repl_str, \ + (STRING), \ + NULL \ + ) + +#endif // _FNMATCH_MACROS_H diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c new file mode 100644 index 00000000000000..3dbd0d59d094d2 --- /dev/null +++ b/Modules/_fnmatch/translate.c @@ -0,0 +1,563 @@ +/* + * C accelerator for the translation function from UNIX shell patterns + * to RE patterns. + */ + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif + +#include "macros.h" +#include "util.h" // for get_fnmatchmodulestate_state() + +#include "pycore_runtime.h" // for _Py_ID() + +// ==== Helper declarations =================================================== + +/* + * Write re.escape(ch). + * + * This returns the number of written characters, or -1 if an error occurred. + */ +static Py_ssize_t +escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch); + +/* + * Construct a regular expression out of a UNIX-style expression. + * + * The expression to translate is the content of an '[(BLOCK)]' expression, + * which contains single unicode characters or character ranges (e.g., 'a-z'). + * + * By convention, 'start' and 'stop' represent the INCLUSIVE start index + * and EXCLUSIVE stop index of BLOCK in 'pattern'. Stated otherwise: + * + * pattern[start] == BLOCK[0] + * pattern[stop] == ']' + * + * For instance, for "ab[c-f]g[!1-5]", the values of 'start' and 'stop' + * for the sub-pattern '[c-f]' are 3 and 6 respectively, while their + * values for '[!1-5]' are 9 and 13 respectively. + * + * The 'pattern_str_find_meth' argument is a reference to pattern.find(). + */ +static PyObject * +translate_expression(fnmatchmodule_state *state, + PyObject *pattern, Py_ssize_t start, Py_ssize_t stop, + PyObject *pattern_str_find_meth); + +/* + * Write the translated pattern obtained by translate_expression(). + * + * This returns the number of written characters, or -1 if an error occurred. + */ +static Py_ssize_t +write_expression(fnmatchmodule_state *state, + PyUnicodeWriter *writer, PyObject *expression); + +/* + * Build the final regular expression by processing the wildcards. + * + * The position of each wildcard in 'pattern' is given by 'indices'. + */ +static PyObject * +process_wildcards(PyObject *pattern, PyObject *indices); + +// ==== API implementation ==================================================== + +PyObject * +_Py_fnmatch_translate(PyObject *module, PyObject *pattern) +{ + assert(PyUnicode_Check(pattern)); + fnmatchmodule_state *state = get_fnmatchmodule_state(module); + const Py_ssize_t maxind = PyUnicode_GET_LENGTH(pattern); + + // We would write less data if there are successive '*', + // which usually happens once or twice in the pattern. + // Otherwise, we write >= maxind characters since escaping + // them always add more characters. + // + // Note that only '()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to + // be escaped when translated to RE patterns and '*' and '?' + // are already handled without being escaped. + // + // In general, UNIX style patterns are more likely to contain + // wildcards than characters to be escaped, with the exception + // of '-', '\' and '~' (we usually want to match filenmaes), + // and there is a sparse number of them. Therefore, we only + // estimate the number of characters to be written to be the + // same as the number of characters in the pattern. + PyUnicodeWriter *writer = PyUnicodeWriter_Create(maxind); + if (writer == NULL) { + return NULL; + } + + // ---- decl local objects ------------------------------------------------ + // list containing the indices where '*' has a special meaning + PyObject *wildcard_indices = NULL; + // call-level cached functions + PyObject *pattern_str_find_meth = NULL; // pattern.find() + // ---- def local objects ------------------------------------------------- + wildcard_indices = PyList_New(0); + CHECK_NOT_NULL_OR_ABORT(wildcard_indices); + pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find)); + CHECK_NOT_NULL_OR_ABORT(pattern_str_find_meth); + // ------------------------------------------------------------------------ + const unsigned int pattern_kind = PyUnicode_KIND(pattern); + const void *const pattern_data = PyUnicode_DATA(pattern); + // ---- def local macros -------------------------------------------------- +#define READ_CHAR(IND) PyUnicode_READ(pattern_kind, pattern_data, IND) + /* advance IND if the character is CHAR */ +#define ADVANCE_IF_CHAR_IS(CHAR, IND, MAXIND) \ + do { \ + if ((IND) < (MAXIND) && READ_CHAR(IND) == (CHAR)) { \ + ++IND; \ + } \ + } while (0) + // ------------------------------------------------------------------------ + Py_ssize_t i = 0; // current index + Py_ssize_t written = 0; // number of characters written + while (i < maxind) { + Py_UCS4 chr = READ_CHAR(i++); + switch (chr) { + case '*': { + // translate wildcard '*' (fnmatch) into optional '.' (regex) + WRITE_CHAR_OR_ABORT(writer, '*'); + // skip duplicated '*' + for (; i < maxind && READ_CHAR(i) == '*'; ++i); + // store the position of the wildcard + PyObject *wildcard_index = PyLong_FromSsize_t(written++); + CHECK_NOT_NULL_OR_ABORT(wildcard_index); + int rc = PyList_Append(wildcard_indices, wildcard_index); + Py_DECREF(wildcard_index); + CHECK_RET_CODE_OR_ABORT(rc); + break; + } + case '?': { + // translate optional '?' (fnmatch) into optional '.' (regex) + WRITE_CHAR_OR_ABORT(writer, '.'); + ++written; // increase the expected result's length + break; + } + case '[': { + assert(READ_CHAR(i - 1) == '['); + Py_ssize_t j = i; + ADVANCE_IF_CHAR_IS('!', j, maxind); // [! + ADVANCE_IF_CHAR_IS(']', j, maxind); // [!] or [] + for (; j < maxind && READ_CHAR(j) != ']'; ++j); // locate ']' + if (j >= maxind) { + WRITE_ASCII_OR_ABORT(writer, "\\[", 2); + written += 2; // we just wrote 2 characters + break; // explicit early break for clarity + } + else { + assert(READ_CHAR(j) == ']'); + Py_ssize_t pos = PyUnicode_FindChar(pattern, '-', i, j, 1); + if (pos == -2) { + goto abort; + } + PyObject *expr = NULL; + if (pos == -1) { + PyObject *tmp = PyUnicode_Substring(pattern, i, j); + CHECK_NOT_NULL_OR_ABORT(tmp); + expr = BACKSLASH_REPLACE(state, tmp); + Py_DECREF(tmp); + } + else { + expr = translate_expression(state, pattern, i, j, + pattern_str_find_meth); + } + CHECK_NOT_NULL_OR_ABORT(expr); + Py_ssize_t expr_len = write_expression(state, writer, expr); + Py_DECREF(expr); + CHECK_UNSIGNED_INT_OR_ABORT(expr_len); + written += expr_len; + i = j + 1; // jump to the character after ']' + break; // explicit early break for clarity + } + } + default: { + Py_ssize_t t = escape_char(state, writer, chr); + CHECK_UNSIGNED_INT_OR_ABORT(t); + written += t; + break; + } + } + } +#undef ADVANCE_IF_CHAR_IS +#undef READ_CHAR + Py_DECREF(pattern_str_find_meth); + PyObject *translated = PyUnicodeWriter_Finish(writer); + if (translated == NULL) { + Py_DECREF(wildcard_indices); + return NULL; + } + PyObject *res = process_wildcards(translated, wildcard_indices); + Py_DECREF(translated); + Py_DECREF(wildcard_indices); + return res; +abort: + Py_XDECREF(pattern_str_find_meth); + Py_XDECREF(wildcard_indices); + PyUnicodeWriter_Discard(writer); + return NULL; +} + +// ==== Helper implementations ================================================ + +/* taken from unicodeobject.c */ +static inline PyObject * +unicode_char(Py_UCS4 ch) +{ +#define MAX_UNICODE 0x10ffff + assert(ch <= MAX_UNICODE); +#undef MAX_UNICODE + if (ch < 256) { + return _Py_LATIN1_CHR(ch); + } + PyObject *unicode = PyUnicode_New(1, ch); + if (unicode == NULL) { + return NULL; + } + assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND); + if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) { + PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch; + } + else { + assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND); + PyUnicode_4BYTE_DATA(unicode)[0] = ch; + } + assert(_PyUnicode_CheckConsistency(unicode, 1)); + return unicode; +} + +static Py_ssize_t +escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch) +{ + PyObject *str = unicode_char(ch); + CHECK_NOT_NULL_OR_ABORT(str); + PyObject *escaped = PyObject_CallOneArg(state->re_escape, str); + Py_DECREF(str); + CHECK_NOT_NULL_OR_ABORT(escaped); + Py_ssize_t written = PyUnicode_GET_LENGTH(escaped); + int rc = _WRITE_STRING(writer, escaped); + Py_DECREF(escaped); + CHECK_RET_CODE_OR_ABORT(rc); + return written; +abort: + return -1; +} + +/* + * Extract a list of chunks from the pattern group described by start and stop. + * + * For instance, the chunks for [a-z0-9] or [!a-z0-9] are ['a', 'z0', '9']. + * + * See translate_expression() for its usage. + */ +static PyObject * +split_expression(fnmatchmodule_state *state, + PyObject *pattern, Py_ssize_t start, Py_ssize_t stop, + PyObject *str_find_func) +{ + // ---- decl local objects ------------------------------------------------ + PyObject *chunks = NULL, *maxind = NULL; + PyObject *hyphen = state->hyphen_str; + // ---- def local objects ------------------------------------------------- + chunks = PyList_New(0); + CHECK_NOT_NULL_OR_ABORT(chunks); + maxind = PyLong_FromSsize_t(stop); + CHECK_NOT_NULL_OR_ABORT(maxind); + // ---- def local macros -------------------------------------------------- + /* add pattern[START:STOP] to the list of chunks */ +#define ADD_CHUNK(START, STOP) \ + do { \ + PyObject *chunk = PyUnicode_Substring(pattern, (START), (STOP)); \ + CHECK_NOT_NULL_OR_ABORT(chunk); \ + int rc = PyList_Append(chunks, chunk); \ + Py_DECREF(chunk); \ + CHECK_RET_CODE_OR_ABORT(rc); \ + } while (0) + // ------------------------------------------------------------------------ + Py_ssize_t chunk_start = start; + bool is_complement = PyUnicode_READ_CHAR(pattern, start) == '!'; + // skip '!' character (it is handled separately in write_expression()) + Py_ssize_t ind = is_complement ? start + 2 : start + 1; + while (ind < stop) { + PyObject *p_chunk_stop = PyObject_CallFunction(str_find_func, "OnO", + hyphen, ind, maxind); + CHECK_NOT_NULL_OR_ABORT(p_chunk_stop); + Py_ssize_t chunk_stop = PyLong_AsSsize_t(p_chunk_stop); + Py_DECREF(p_chunk_stop); + if (chunk_stop < 0) { + if (PyErr_Occurred()) { + goto abort; + } + // -1 here means that '-' was not found + assert(chunk_stop == -1); + break; + } + ADD_CHUNK(chunk_start, chunk_stop); + chunk_start = chunk_stop + 1; // jump after '-' + ind = chunk_stop + 3; // ensure a non-empty next chunk + } + if (chunk_start < stop) { + ADD_CHUNK(chunk_start, stop); + } + else { + Py_ssize_t chunkscount = PyList_GET_SIZE(chunks); + assert(chunkscount > 0); + PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1); + assert(chunk != NULL); + PyObject *str = PyUnicode_Concat(chunk, hyphen); + // PyList_SetItem() does not create a new reference on 'str' + // so we should not decref 'str' after the call, unless there + // is an issue while setting the item. + if (str == NULL || PyList_SetItem(chunks, chunkscount - 1, str) < 0) { + Py_XDECREF(str); + goto abort; + } + } +#undef ADD_CHUNK + Py_DECREF(maxind); + return chunks; +abort: + Py_XDECREF(maxind); + Py_XDECREF(chunks); + return NULL; +} + +/* + * Remove empty ranges (they are invalid in RE). + * + * See translate_expression() for its usage. + */ +static int +simplify_expression(PyObject *chunks) +{ + // for k in range(len(chunks) - 1, 0, -1): + for (Py_ssize_t k = PyList_GET_SIZE(chunks) - 1; k > 0; --k) { + PyObject *c1 = PyList_GET_ITEM(chunks, k - 1); + assert(c1 != NULL); + Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1); + assert(c1len > 0); + + PyObject *c2 = PyList_GET_ITEM(chunks, k); + assert(c2 != NULL); + Py_ssize_t c2len = PyUnicode_GET_LENGTH(c2); + assert(c2len > 0); + + if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) { + Py_ssize_t olen = c1len + c2len - 2; + assert(olen >= 0); + PyObject *str = NULL; + if (olen == 0) { // c1[:1] + c2[1:] == '' + str = Py_GetConstant(Py_CONSTANT_EMPTY_STR); + assert(_Py_IsImmortal(str)); + } + else if (c1len == 1) { // c1[:1] + c2[1:] == c2[1:] + assert(c2len > 1); + str = PyUnicode_Substring(c2, 1, c2len); + } + else if (c2len == 1) { // c1[:1] + c2[1:] == c1[:1] + assert(c1len > 1); + str = PyUnicode_Substring(c1, 0, c1len - 1); + } + else { + assert(c1len > 1); + assert(c2len > 1); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen); + CHECK_NOT_NULL_OR_ABORT(writer); + // all but the last character in the first chunk + if (_WRITE_SUBSTRING(writer, c1, 0, c1len - 1) < 0) { + PyUnicodeWriter_Discard(writer); + goto abort; + } + // all but the first character in the second chunk + if (_WRITE_SUBSTRING(writer, c2, 1, c2len) < 0) { + PyUnicodeWriter_Discard(writer); + goto abort; + } + str = PyUnicodeWriter_Finish(writer); + } + // PyList_SetItem() does not create a new reference on 'str' + // so we should not decref 'str' after the call, unless there + // is an issue while setting the item. + if (str == NULL || PyList_SetItem(chunks, k - 1, str) < 0) { + Py_XDECREF(str); + goto abort; + } + CHECK_RET_CODE_OR_ABORT(PySequence_DelItem(chunks, k)); + } + } + return 0; +abort: + return -1; +} + +/* + * Escape backslashes and hyphens for set difference (--), + * but hyphens that create ranges should not be escaped. + * + * See translate_expression() for its usage. + */ +static int +escape_expression(fnmatchmodule_state *state, PyObject *chunks) +{ + for (Py_ssize_t c = 0; c < PyList_GET_SIZE(chunks); ++c) { + PyObject *s0 = PyList_GET_ITEM(chunks, c); + assert(s0 != NULL); + PyObject *s1 = BACKSLASH_REPLACE(state, s0); + CHECK_NOT_NULL_OR_ABORT(s1); + PyObject *s2 = HYPHEN_REPLACE(state, s1); + Py_DECREF(s1); + // PyList_SetItem() does not create a new reference on 's2' + // so we should not decref 's2' after the call, unless there + // is an issue while setting the item. + if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) { + Py_XDECREF(s2); + goto abort; + } + } + return 0; +abort: + return -1; +} + +static PyObject * +translate_expression(fnmatchmodule_state *state, + PyObject *pattern, Py_ssize_t start, Py_ssize_t stop, + PyObject *pattern_str_find_meth) +{ + PyObject *chunks = split_expression(state, pattern, start, stop, + pattern_str_find_meth); + CHECK_NOT_NULL_OR_ABORT(chunks); + // remove empty ranges + CHECK_RET_CODE_OR_ABORT(simplify_expression(chunks)); + // escape backslashes and set differences + CHECK_RET_CODE_OR_ABORT(escape_expression(state, chunks)); + PyObject *res = PyUnicode_Join(state->hyphen_str, chunks); + Py_DECREF(chunks); + return res; +abort: + Py_XDECREF(chunks); + return NULL; +} + +static Py_ssize_t +write_expression(fnmatchmodule_state *state, + PyUnicodeWriter *writer, PyObject *expression) +{ + PyObject *safe_expression = NULL; // for the 'goto abort' statements + Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression); + if (grouplen == 0) { + // empty range: never match + WRITE_ASCII_OR_ABORT(writer, "(?!)", 4); + return 4; + } + Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0); + if (grouplen == 1 && token == '!') { + // negated empty range: match any character + WRITE_CHAR_OR_ABORT(writer, '.'); + return 1; + } + Py_ssize_t extra = 2; // '[' and ']' + WRITE_CHAR_OR_ABORT(writer, '['); + // escape set operations as late as possible + safe_expression = SETOPS_REPLACE(state, expression, state->setops_re_subfn); + CHECK_NOT_NULL_OR_ABORT(safe_expression); + switch (token) { + case '!': { + WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^' + WRITE_SUBSTRING_OR_ABORT(writer, safe_expression, 1, grouplen); + break; + } + case '^': + case '[': { + WRITE_CHAR_OR_ABORT(writer, '\\'); + ++extra; // because we wrote '\\' + WRITE_STRING_OR_ABORT(writer, safe_expression); + break; + } + default: { + WRITE_STRING_OR_ABORT(writer, safe_expression); + break; + } + } + Py_DECREF(safe_expression); + WRITE_CHAR_OR_ABORT(writer, ']'); + return grouplen + extra; +abort: + Py_XDECREF(safe_expression); + return -1; +} + +static PyObject * +process_wildcards(PyObject *pattern, PyObject *indices) +{ + const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern); + const Py_ssize_t m = PyList_GET_SIZE(indices); + // Let m = len(indices) and n = len(pattern). By construction, + // + // pattern = [PREFIX] [[(* INNER) ... (* INNER)] (*) [OUTER]] + // + // where [...] is an optional group and (...) is a required group. + // + // The algorithm is as follows: + // + // - Write "(?s:". + // - Write the optional PREFIX. + // - Write an INNER group (* INNER) as "(?>.*?" + INNER + ")". + // - Write ".*" instead of the last wildcard. + // - Write an optional OUTER string normally. + // - Write ")\\Z". + // + // If m = 0, the writer needs n + 7 characters. Otherwise, it requires + // exactly n + 6(m-1) + 1 + 7 = n + 6m + 2 characters, where the "+1" + // is due to the fact that writing ".*" instead of "*" only increases + // the total length of the pattern by 1 (and not by 2). + const Py_ssize_t reslen = m == 0 ? (n + 7) : (n + 6 * m + 2); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(reslen); + if (writer == NULL) { + return NULL; + } + // ---- def local macros -------------------------------------------------- +#define LOAD_WILDCARD_INDEX(VAR, IND) \ + do { \ + VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND))); \ + /* wildcard indices must be >= 0 */ \ + CHECK_UNSIGNED_INT_OR_ABORT(VAR); \ + } while (0) + // ------------------------------------------------------------------------ + WRITE_ASCII_OR_ABORT(writer, "(?s:", 4); + if (m == 0) { + WRITE_STRING_OR_ABORT(writer, pattern); + } + else { + Py_ssize_t i = 0, j = -1; + // process the optional PREFIX + LOAD_WILDCARD_INDEX(j, 0); + WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j); + i = j + 1; + for (Py_ssize_t k = 1; k < m; ++k) { + // process the (* INNER) groups + LOAD_WILDCARD_INDEX(j, k); + assert(i < j); + // write the atomic RE group '(?>.*?' + INNER + ')' + WRITE_ASCII_OR_ABORT(writer, "(?>.*?", 6); + WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j); + WRITE_CHAR_OR_ABORT(writer, ')'); + i = j + 1; + } + // handle the (*) [OUTER] part + WRITE_ASCII_OR_ABORT(writer, ".*", 2); + WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, n); + } + WRITE_ASCII_OR_ABORT(writer, ")\\Z", 3); + PyObject *res = PyUnicodeWriter_Finish(writer); + assert(res == NULL || PyUnicode_GET_LENGTH(res) == reslen); + return res; +abort: + PyUnicodeWriter_Discard(writer); + return NULL; +#undef LOAD_WILDCARD_INDEX +} diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h new file mode 100644 index 00000000000000..276921328dd868 --- /dev/null +++ b/Modules/_fnmatch/util.h @@ -0,0 +1,70 @@ +/* + * This file contains helper prototypes and structures. + */ + +#ifndef _FNMATCH_UTIL_H +#define _FNMATCH_UTIL_H + +#include "Python.h" + +typedef struct { + PyObject *os_module; // import os + PyObject *re_module; // import re + + PyObject *translator; // LRU-cached translation unit + PyObject *re_escape; // LRU-cached re.escape() function + + // strings used by translate.c + PyObject *hyphen_str; // hyphen '-' + PyObject *hyphen_esc_str; // escaped hyphen '\\-' + + PyObject *backslash_str; // backslash '\\' + PyObject *backslash_esc_str; // escaped backslash '\\\\' + + /* set operation tokens (&&, ~~ and ||) are not supported in regex */ + PyObject *setops_re_subfn; // cached re.compile('([&~|])').sub() + PyObject *setops_repl_str; // replacement pattern '\\\\\\1' +} fnmatchmodule_state; + +static inline fnmatchmodule_state * +get_fnmatchmodule_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (fnmatchmodule_state *)state; +} + +// ==== Helper prototypes ===================================================== + +/* + * Returns a list of matched names, or NULL if an error occurred. + * + * Parameters + * + * matcher A reference to the 'match()' method of a compiled pattern. + * names An iterable of strings (str or bytes objects) to match. + * normcase A reference to os.path.normcase(). + * + * This is equivalent to: + * + * [name for name in names if matcher(normcase(name))] + */ +extern PyObject * +_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase); + +/* + * C accelerator for translating UNIX shell patterns into RE patterns. + * + * Parameters + * + * module A module with a state given by get_fnmatchmodule_state(). + * pattern A Unicode object to translate. + * + * Returns + * + * A translated unicode RE pattern. + */ +extern PyObject * +_Py_fnmatch_translate(PyObject *module, PyObject *pattern); + +#endif // _FNMATCH_UTIL_H diff --git a/Modules/clinic/_fnmatchmodule.c.h b/Modules/clinic/_fnmatchmodule.c.h new file mode 100644 index 00000000000000..4b12f33113d3fb --- /dev/null +++ b/Modules/clinic/_fnmatchmodule.c.h @@ -0,0 +1,185 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_fnmatch_filter__doc__, +"filter($module, /, names, pat)\n" +"--\n" +"\n"); + +#define _FNMATCH_FILTER_METHODDEF \ + {"filter", _PyCFunction_CAST(_fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, _fnmatch_filter__doc__}, + +static PyObject * +_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat); + +static PyObject * +_fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(names), &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"names", "pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "filter", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *names; + PyObject *pat; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + names = args[0]; + pat = args[1]; + return_value = _fnmatch_filter_impl(module, names, pat); + +exit: + return return_value; +} + +PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__, +"fnmatchcase($module, /, name, pat)\n" +"--\n" +"\n" +"Test whether `name` matches `pattern`, including case.\n" +"\n" +"This is a version of fnmatch() which doesn\'t case-normalize\n" +"its arguments."); + +#define _FNMATCH_FNMATCHCASE_METHODDEF \ + {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__}, + +static int +_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat); + +static PyObject * +_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", "pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "fnmatchcase", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *name; + PyObject *pat; + int _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + name = args[0]; + pat = args[1]; + _return_value = _fnmatch_fnmatchcase_impl(module, name, pat); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_fnmatch_translate__doc__, +"translate($module, /, pat)\n" +"--\n" +"\n"); + +#define _FNMATCH_TRANSLATE_METHODDEF \ + {"translate", _PyCFunction_CAST(_fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, _fnmatch_translate__doc__}, + +static PyObject * +_fnmatch_translate_impl(PyObject *module, PyObject *pattern); + +static PyObject * +_fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(pat), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pat", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "translate", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *pattern; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + pattern = args[0]; + return_value = _fnmatch_translate_impl(module, pattern); + +exit: + return return_value; +} +/*[clinic end generated code: output=b0366b259b101bdf input=a9049054013a1b77]*/ diff --git a/PC/config.c b/PC/config.c index b744f711b0d636..7c7c2540118cf2 100644 --- a/PC/config.c +++ b/PC/config.c @@ -10,6 +10,7 @@ extern PyObject* PyInit_array(void); extern PyObject* PyInit_binascii(void); extern PyObject* PyInit_cmath(void); extern PyObject* PyInit_errno(void); +extern PyObject* PyInit__fnmatch(void); extern PyObject* PyInit_faulthandler(void); extern PyObject* PyInit__tracemalloc(void); extern PyObject* PyInit_gc(void); @@ -91,6 +92,7 @@ struct _inittab _PyImport_Inittab[] = { {"binascii", PyInit_binascii}, {"cmath", PyInit_cmath}, {"errno", PyInit_errno}, + {"_fnmatch", PyInit__fnmatch}, {"faulthandler", PyInit_faulthandler}, {"gc", PyInit_gc}, {"math", PyInit_math}, diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 9e3af689f4a288..2083072f6cf8cf 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -369,6 +369,8 @@ + + @@ -474,6 +476,9 @@ + + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 31f7971bda845d..301030d50b5733 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -252,6 +252,12 @@ Modules + + Modules\_fnmatch + + + Modules\_fnmatch + Modules\_io @@ -1061,6 +1067,15 @@ Modules + + Modules\_fnmatch + + + Modules\_fnmatch + + + Modules\_fnmatch + Modules\_io diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 4d595d98445a05..269c7b34e9d477 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -32,6 +32,7 @@ static const char* _Py_stdlib_module_names[] = { "_dbm", "_decimal", "_elementtree", +"_fnmatch", "_frozen_importlib", "_frozen_importlib_external", "_functools", diff --git a/configure b/configure index 52988f77f6d926..60ebd04f3735d7 100755 --- a/configure +++ b/configure @@ -801,6 +801,8 @@ MODULE__JSON_FALSE MODULE__JSON_TRUE MODULE__HEAPQ_FALSE MODULE__HEAPQ_TRUE +MODULE__FNMATCH_FALSE +MODULE__FNMATCH_TRUE MODULE__CSV_FALSE MODULE__CSV_TRUE MODULE__CONTEXTVARS_FALSE @@ -27749,6 +27751,7 @@ SRCDIRS="\ Modules/_ctypes \ Modules/_decimal \ Modules/_decimal/libmpdec \ + Modules/_fnmatch \ Modules/_hacl \ Modules/_io \ Modules/_multiprocessing \ @@ -29233,6 +29236,28 @@ then : +fi + + + if test "$py_cv_module__fnmatch" != "n/a" +then : + py_cv_module__fnmatch=yes +fi + if test "$py_cv_module__fnmatch" = yes; then + MODULE__FNMATCH_TRUE= + MODULE__FNMATCH_FALSE='#' +else + MODULE__FNMATCH_TRUE='#' + MODULE__FNMATCH_FALSE= +fi + + as_fn_append MODULE_BLOCK "MODULE__FNMATCH_STATE=$py_cv_module__fnmatch$as_nl" + if test "x$py_cv_module__fnmatch" = xyes +then : + + as_fn_append MODULE_BLOCK "MODULE__FNMATCH_CFLAGS=-I\$(srcdir)/Modules/_fnmatch$as_nl" + + fi @@ -31818,6 +31843,10 @@ if test -z "${MODULE__CSV_TRUE}" && test -z "${MODULE__CSV_FALSE}"; then as_fn_error $? "conditional \"MODULE__CSV\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${MODULE__FNMATCH_TRUE}" && test -z "${MODULE__FNMATCH_FALSE}"; then + as_fn_error $? "conditional \"MODULE__FNMATCH\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${MODULE__HEAPQ_TRUE}" && test -z "${MODULE__HEAPQ_FALSE}"; then as_fn_error $? "conditional \"MODULE__HEAPQ\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/configure.ac b/configure.ac index 5bde6803cd5a7b..2183ae2a590daa 100644 --- a/configure.ac +++ b/configure.ac @@ -7009,6 +7009,7 @@ SRCDIRS="\ Modules/_ctypes \ Modules/_decimal \ Modules/_decimal/libmpdec \ + Modules/_fnmatch \ Modules/_hacl \ Modules/_io \ Modules/_multiprocessing \ @@ -7694,6 +7695,7 @@ PY_STDLIB_MOD_SIMPLE([_asyncio]) PY_STDLIB_MOD_SIMPLE([_bisect]) PY_STDLIB_MOD_SIMPLE([_contextvars]) PY_STDLIB_MOD_SIMPLE([_csv]) +PY_STDLIB_MOD_SIMPLE([_fnmatch], [-I\$(srcdir)/Modules/_fnmatch], []) PY_STDLIB_MOD_SIMPLE([_heapq]) PY_STDLIB_MOD_SIMPLE([_json]) PY_STDLIB_MOD_SIMPLE([_lsprof])