diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index d9b46df507dfd7..661490cd73c00d 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -862,6 +862,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(col_offset));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(command));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(comment_factory));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile_mode));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context));
@@ -915,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(escape));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type));
@@ -943,6 +945,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filter));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filters));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(final));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find_class));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags));
@@ -1086,6 +1089,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_unnamed_fields));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(names));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg));
@@ -1099,6 +1103,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nlocals));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_depth));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_offset));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(normcase));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ns));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nstype));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nt));
@@ -1128,6 +1133,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pat));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pattern));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(peek));
@@ -1225,6 +1231,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict_mode));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(string));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub_key));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 10773d7a6c7e3f..2882390f9780c2 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -351,6 +351,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(col_offset)
STRUCT_FOR_ID(command)
STRUCT_FOR_ID(comment_factory)
+ STRUCT_FOR_ID(compile)
STRUCT_FOR_ID(compile_mode)
STRUCT_FOR_ID(consts)
STRUCT_FOR_ID(context)
@@ -404,6 +405,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(entrypoint)
STRUCT_FOR_ID(env)
STRUCT_FOR_ID(errors)
+ STRUCT_FOR_ID(escape)
STRUCT_FOR_ID(event)
STRUCT_FOR_ID(eventmask)
STRUCT_FOR_ID(exc_type)
@@ -432,6 +434,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(filter)
STRUCT_FOR_ID(filters)
STRUCT_FOR_ID(final)
+ STRUCT_FOR_ID(find)
STRUCT_FOR_ID(find_class)
STRUCT_FOR_ID(fix_imports)
STRUCT_FOR_ID(flags)
@@ -575,6 +578,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(n_unnamed_fields)
STRUCT_FOR_ID(name)
STRUCT_FOR_ID(name_from)
+ STRUCT_FOR_ID(names)
STRUCT_FOR_ID(namespace_separator)
STRUCT_FOR_ID(namespaces)
STRUCT_FOR_ID(narg)
@@ -588,6 +592,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(nlocals)
STRUCT_FOR_ID(node_depth)
STRUCT_FOR_ID(node_offset)
+ STRUCT_FOR_ID(normcase)
STRUCT_FOR_ID(ns)
STRUCT_FOR_ID(nstype)
STRUCT_FOR_ID(nt)
@@ -617,6 +622,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(pages)
STRUCT_FOR_ID(parent)
STRUCT_FOR_ID(password)
+ STRUCT_FOR_ID(pat)
STRUCT_FOR_ID(path)
STRUCT_FOR_ID(pattern)
STRUCT_FOR_ID(peek)
@@ -714,6 +720,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(strict)
STRUCT_FOR_ID(strict_mode)
STRUCT_FOR_ID(string)
+ STRUCT_FOR_ID(sub)
STRUCT_FOR_ID(sub_key)
STRUCT_FOR_ID(symmetric_difference_update)
STRUCT_FOR_ID(tabsize)
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 618f8d0a36b6c3..ecd624e4bca02a 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -860,6 +860,7 @@ extern "C" {
INIT_ID(col_offset), \
INIT_ID(command), \
INIT_ID(comment_factory), \
+ INIT_ID(compile), \
INIT_ID(compile_mode), \
INIT_ID(consts), \
INIT_ID(context), \
@@ -913,6 +914,7 @@ extern "C" {
INIT_ID(entrypoint), \
INIT_ID(env), \
INIT_ID(errors), \
+ INIT_ID(escape), \
INIT_ID(event), \
INIT_ID(eventmask), \
INIT_ID(exc_type), \
@@ -941,6 +943,7 @@ extern "C" {
INIT_ID(filter), \
INIT_ID(filters), \
INIT_ID(final), \
+ INIT_ID(find), \
INIT_ID(find_class), \
INIT_ID(fix_imports), \
INIT_ID(flags), \
@@ -1084,6 +1087,7 @@ extern "C" {
INIT_ID(n_unnamed_fields), \
INIT_ID(name), \
INIT_ID(name_from), \
+ INIT_ID(names), \
INIT_ID(namespace_separator), \
INIT_ID(namespaces), \
INIT_ID(narg), \
@@ -1097,6 +1101,7 @@ extern "C" {
INIT_ID(nlocals), \
INIT_ID(node_depth), \
INIT_ID(node_offset), \
+ INIT_ID(normcase), \
INIT_ID(ns), \
INIT_ID(nstype), \
INIT_ID(nt), \
@@ -1126,6 +1131,7 @@ extern "C" {
INIT_ID(pages), \
INIT_ID(parent), \
INIT_ID(password), \
+ INIT_ID(pat), \
INIT_ID(path), \
INIT_ID(pattern), \
INIT_ID(peek), \
@@ -1223,6 +1229,7 @@ extern "C" {
INIT_ID(strict), \
INIT_ID(strict_mode), \
INIT_ID(string), \
+ INIT_ID(sub), \
INIT_ID(sub_key), \
INIT_ID(symmetric_difference_update), \
INIT_ID(tabsize), \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index f848a002c3b5d1..e114fca09aefe8 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1204,6 +1204,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(compile);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(compile_mode);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1416,6 +1420,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(escape);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(event);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1528,6 +1536,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(find);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(find_class);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2100,6 +2112,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(names);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(namespace_separator);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2152,6 +2168,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(normcase);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(ns);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2268,6 +2288,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(pat);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(path);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2656,6 +2680,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(sub);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(sub_key);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 73acb1fe8d4106..1dc52f2575ae6c 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -16,24 +16,72 @@
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
-def fnmatch(name, pat):
- """Test whether FILENAME matches PATTERN.
-
- Patterns are Unix shell style:
-
- * matches everything
- ? matches any single character
- [seq] matches any character in seq
- [!seq] matches any char not in seq
-
- An initial period in FILENAME is not special.
- Both FILENAME and PATTERN are first case-normalized
- if the operating system requires it.
- If you don't want this, use fnmatchcase(FILENAME, PATTERN).
- """
- name = os.path.normcase(name)
- pat = os.path.normcase(pat)
- return fnmatchcase(name, pat)
+try:
+ from _fnmatch import filter
+except ImportError:
+ def filter(names, pat):
+ """Construct a list from the names in *names* matching *pat*."""
+ result = []
+ pat = os.path.normcase(pat)
+ match = _compile_pattern(pat)
+ if os.path is posixpath:
+ # normcase on posix is NOP. Optimize it away from the loop.
+ for name in names:
+ if match(name):
+ result.append(name)
+ else:
+ for name in names:
+ if match(os.path.normcase(name)):
+ result.append(name)
+ return result
+
+try:
+ from _fnmatch import fnmatch
+except ImportError:
+ def fnmatch(name, pat):
+ """Test whether *name* matches *pat*.
+
+ Patterns are Unix shell style:
+
+ * matches everything
+ ? matches any single character
+ [seq] matches any character in seq
+ [!seq] matches any char not in seq
+
+ An initial period in *name* is not special.
+ Both *name* and *pat* are first case-normalized
+ if the operating system requires it.
+
+ If you don't want this, use fnmatchcase(name, pat).
+ """
+ name = os.path.normcase(name)
+ pat = os.path.normcase(pat)
+ return fnmatchcase(name, pat)
+
+try:
+ from _fnmatch import fnmatchcase
+except ImportError:
+ def fnmatchcase(name, pat):
+ """Test whether *name* matches *pat*, including case.
+
+ This is a version of fnmatch() which doesn't case-normalize
+ its arguments.
+ """
+ match = _compile_pattern(pat)
+ return match(name) is not None
+
+try:
+ from _fnmatch import translate
+except ImportError:
+ def translate(pat):
+ """Translate a shell pattern *pat* to a regular expression.
+
+ There is no way to quote meta-characters.
+ """
+
+ STAR = object()
+ parts = _translate(pat, STAR, '.')
+ return _join_translated_parts(parts, STAR)
@functools.lru_cache(maxsize=32768, typed=True)
def _compile_pattern(pat):
@@ -45,43 +93,6 @@ def _compile_pattern(pat):
res = translate(pat)
return re.compile(res).match
-def filter(names, pat):
- """Construct a list from those elements of the iterable NAMES that match PAT."""
- result = []
- pat = os.path.normcase(pat)
- match = _compile_pattern(pat)
- if os.path is posixpath:
- # normcase on posix is NOP. Optimize it away from the loop.
- for name in names:
- if match(name):
- result.append(name)
- else:
- for name in names:
- if match(os.path.normcase(name)):
- result.append(name)
- return result
-
-def fnmatchcase(name, pat):
- """Test whether FILENAME matches PATTERN, including case.
-
- This is a version of fnmatch() which doesn't case-normalize
- its arguments.
- """
- match = _compile_pattern(pat)
- return match(name) is not None
-
-
-def translate(pat):
- """Translate a shell PATTERN to a regular expression.
-
- There is no way to quote meta-characters.
- """
-
- STAR = object()
- parts = _translate(pat, STAR, '.')
- return _join_translated_parts(parts, STAR)
-
-
def _translate(pat, STAR, QUESTION_MARK):
res = []
add = res.append
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10ed496d4e2f37..6ab244021ea20d 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,23 +1,29 @@
"""Test cases for the fnmatch module."""
-import unittest
+import itertools
import os
import string
+import unittest
import warnings
-from fnmatch import fnmatch, fnmatchcase, translate, filter
+import test.support.import_helper
+
+c_fnmatch = test.support.import_helper.import_fresh_module("_fnmatch")
+py_fnmatch = test.support.import_helper.import_fresh_module("fnmatch", blocked=["_fnmatch"])
-class FnmatchTestCase(unittest.TestCase):
+class FnmatchTestCaseMixin:
+ fnmatch = None
- def check_match(self, filename, pattern, should_match=True, fn=fnmatch):
- if should_match:
- self.assertTrue(fn(filename, pattern),
- "expected %r to match pattern %r"
- % (filename, pattern))
- else:
- self.assertFalse(fn(filename, pattern),
- "expected %r not to match pattern %r"
- % (filename, pattern))
+ def check_match(self, filename, pattern, should_match=True, func=None):
+ if func is None:
+ func = self.fnmatch.fnmatch
+
+ with self.subTest(fn=func, name=filename, pattern=pattern):
+ res = func(filename, pattern)
+ if should_match:
+ self.assertTrue(res, f"expected {filename!r} to match pattern {pattern!r}")
+ else:
+ self.assertFalse(res, f"expected {filename!r} not to match pattern {pattern!r}")
def test_fnmatch(self):
check = self.check_match
@@ -54,13 +60,17 @@ def test_slow_fnmatch(self):
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
def test_mix_bytes_str(self):
+ fnmatch = self.fnmatch.fnmatch
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
+
+ fnmatchcase = self.fnmatch.fnmatchcase
self.assertRaises(TypeError, fnmatchcase, 'test', b'*')
self.assertRaises(TypeError, fnmatchcase, b'test', '*')
def test_fnmatchcase(self):
check = self.check_match
+ fnmatchcase = self.fnmatch.fnmatchcase
check('abc', 'abc', True, fnmatchcase)
check('AbC', 'abc', False, fnmatchcase)
check('abc', 'AbC', False, fnmatchcase)
@@ -216,11 +226,18 @@ def test_warnings(self):
check(',', '[a-z+--A-Z]')
check('.', '[a-z--/A-Z]')
+class PurePythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
+ fnmatch = py_fnmatch
+
+class CPythonFnmatchTestCase(FnmatchTestCaseMixin, unittest.TestCase):
+ fnmatch = c_fnmatch
-class TranslateTestCase(unittest.TestCase):
+class TranslateTestCaseMixin:
+ fnmatch = None
def test_translate(self):
import re
+ translate = self.fnmatch.translate
self.assertEqual(translate('*'), r'(?s:.*)\Z')
self.assertEqual(translate('?'), r'(?s:.)\Z')
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
@@ -250,32 +267,148 @@ def test_translate(self):
self.assertTrue(re.match(fatre, 'cbabcaxc'))
self.assertFalse(re.match(fatre, 'dabccbad'))
-class FilterTestCase(unittest.TestCase):
+ def test_translate_wildcards(self):
+ for pattern, expect in [
+ ('', r'(?s:)\Z'),
+ ('ab*', r'(?s:ab.*)\Z'),
+ ('ab*cd', r'(?s:ab.*cd)\Z'),
+ ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'),
+ ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\Z'),
+ ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\Z'),
+ ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'),
+ ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+ ]:
+ translated = self.fnmatch.translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+ for pattern, expect in [
+ ('*ab', r'(?s:.*ab)\Z'),
+ ('*ab*', r'(?s:(?>.*?ab).*)\Z'),
+ ('*ab*cd', r'(?s:(?>.*?ab).*cd)\Z'),
+ ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\Z'),
+ ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\Z'),
+ ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\Z'),
+ ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'),
+ ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+ ]:
+ translated = self.fnmatch.translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+ def test_translate_expressions(self):
+ for pattern, expect in [
+ ('[', r'(?s:\[)\Z'),
+ ('[!', r'(?s:\[!)\Z'),
+ ('[]', r'(?s:\[\])\Z'),
+ ('[abc', r'(?s:\[abc)\Z'),
+ ('[!abc', r'(?s:\[!abc)\Z'),
+ ('[abc]', r'(?s:[abc])\Z'),
+ ('[!abc]', r'(?s:[^abc])\Z'),
+ # with [[
+ ('[[', r'(?s:\[\[)\Z'),
+ ('[[a', r'(?s:\[\[a)\Z'),
+ ('[[]', r'(?s:[\[])\Z'),
+ ('[[]a', r'(?s:[\[]a)\Z'),
+ ('[[]]', r'(?s:[\[]\])\Z'),
+ ('[[]a]', r'(?s:[\[]a\])\Z'),
+ ('[[a]', r'(?s:[\[a])\Z'),
+ ('[[a]]', r'(?s:[\[a]\])\Z'),
+ ('[[a]b', r'(?s:[\[a]b)\Z'),
+ # backslashes
+ ('[\\', r'(?s:\[\\)\Z'),
+ (r'[\]', r'(?s:[\\])\Z'),
+ (r'[\\]', r'(?s:[\\\\])\Z'),
+ ]:
+ translated = self.fnmatch.translate(pattern)
+ self.assertEqual(translated, expect, pattern)
+
+class PurePythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
+ fnmatch = py_fnmatch
+
+class CPythonTranslateTestCase(TranslateTestCaseMixin, unittest.TestCase):
+ fnmatch = c_fnmatch
+
+ @staticmethod
+ def translate_func(pattern):
+ # Pure Python implementation of translate()
+ STAR = object()
+ parts = py_fnmatch._translate(pattern, STAR, '.')
+ return py_fnmatch._join_translated_parts(parts, STAR)
+
+ def test_translate(self):
+ # We want to check that the C implementation is EXACTLY the same
+ # as the Python implementation. For that, we will need to cover
+ # a lot of cases.
+ translate = self.fnmatch.translate
+
+ for choice in itertools.combinations_with_replacement('*?.', 5):
+ for suffix in ['', '!']:
+ pat = suffix + ''.join(choice)
+ with self.subTest(pattern=pat):
+ self.assertEqual(translate(pat), self.translate_func(pat))
+
+ for pat in [
+ '',
+ '!!a*', '!\\!a*', '!a*', '*', '**', '*******?', '*******c', '*****??', '**/',
+ '*.js', '*/man*/bash.*', '*???', '?', '?*****??', '?*****?c', '?***?****',
+ '?***?****?', '?***?****c', '?*?', '??', '???', '???*', '[!\\]',
+ '\\**', '\\*\\*', 'a*', 'a*****?c', 'a****c**?**??*****', 'a***c',
+ 'a**?**cd**?**??***k', 'a**?**cd**?**??***k**', 'a**?**cd**?**??k',
+ 'a**?**cd**?**??k***', 'a*[^c]',
+ 'a*cd**?**??k', 'a/*', 'a/**', 'a/**/b',
+ 'a/**/b/**/c', 'a/.*/c', 'a/?', 'a/??', 'a[X-]b', 'a[\\.]c',
+ 'a[\\b]c', 'a[bc', 'a\\*?/*', 'a\\*b/*',
+ 'ab[!de]', 'ab[cd]', 'ab[cd]ef', 'abc', 'b*/', 'foo*',
+ 'man/man1/bash.1'
+ ]:
+ with self.subTest(pattern=pat):
+ self.assertEqual(translate(pat), self.translate_func(pat))
+
+class FilterTestCaseMixin:
+ fnmatch = None
def test_filter(self):
+ filter = self.fnmatch.filter
self.assertEqual(filter(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*'),
['Python', 'Perl'])
self.assertEqual(filter([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*'),
[b'Python', b'Perl'])
+ def test_filter_iter_errors(self):
+ class BadList:
+ def __iter__(self):
+ yield 'abc'
+ raise ValueError("nope")
+
+ with self.assertRaisesRegex(ValueError, r'^nope$'):
+ self.fnmatch.filter(BadList(), '*')
+
+
def test_mix_bytes_str(self):
+ filter = self.fnmatch.filter
self.assertRaises(TypeError, filter, ['test'], b'*')
self.assertRaises(TypeError, filter, [b'test'], '*')
def test_case(self):
ignorecase = os.path.normcase('P') == os.path.normcase('p')
+ filter = self.fnmatch.filter
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.py'])
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.PL'])
def test_sep(self):
+ filter = self.fnmatch.filter
normsep = os.path.normcase('\\') == os.path.normcase('/')
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr/bin'])
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
+class PurePythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
+ fnmatch = py_fnmatch
+
+class CPythonFilterTestCase(FilterTestCaseMixin, unittest.TestCase):
+ fnmatch = c_fnmatch
if __name__ == "__main__":
unittest.main()
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 9ea7bc49be316c..497e854e10edcf 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -3139,6 +3139,7 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
+MODULE__FNMATCH_DEPS=$(srcdir)/Modules/_fnmatch/macros.h $(srcdir)/Modules/_fnmatch/util.h
MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c
diff --git a/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
new file mode 100644
index 00000000000000..f374f28456d65d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-12-09-24-38.gh-issue-121445.KYtNOZ.rst
@@ -0,0 +1,2 @@
+Improve the performances of :func:`fnmatch.translate` by 2x and of
+:func:`fnmatch.filter` by 1.1x. Patch by Bénédikt Tran.
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index dfc75077650df8..f33af67aa26499 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -33,6 +33,7 @@
@MODULE__BISECT_TRUE@_bisect _bisectmodule.c
@MODULE__CONTEXTVARS_TRUE@_contextvars _contextvarsmodule.c
@MODULE__CSV_TRUE@_csv _csv.c
+@MODULE__FNMATCH_TRUE@_fnmatch _fnmatch/_fnmatchmodule.c _fnmatch/filter.c _fnmatch/translate.c
@MODULE__HEAPQ_TRUE@_heapq _heapqmodule.c
@MODULE__JSON_TRUE@_json _json.c
@MODULE__LSPROF_TRUE@_lsprof _lsprof.c rotatingtree.c
diff --git a/Modules/_fnmatch/_fnmatchmodule.c b/Modules/_fnmatch/_fnmatchmodule.c
new file mode 100644
index 00000000000000..4c05fa2d5d1b68
--- /dev/null
+++ b/Modules/_fnmatch/_fnmatchmodule.c
@@ -0,0 +1,470 @@
+/*
+ * C accelerator for the 'fnmatch' module.
+ *
+ * - Case normalization uses the runtime value of os.path.normcase(),
+ * forcing us to query the attribute each time.
+ *
+ * The C implementation of fnmatch.filter() uses the same os.path.normcase()
+ * when iterating over NAMES, ignoring side-effects on os.path.normcase()
+ * that may occur when processing a NAME in NAMES.
+ *
+ * More generally, os.path.normcase() is retrieved at most once per call
+ * to fnmatch.filter() or fnmatch.fnmatch().
+ */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "macros.h"
+#include "util.h" // prototypes
+
+#include "pycore_runtime.h" // for _Py_ID()
+
+#include "clinic/_fnmatchmodule.c.h"
+
+#define LRU_CACHE_SIZE 32768
+#define INVALID_PATTERN_TYPE "pattern must be a string or a bytes object"
+
+// ==== Cached translation unit ===============================================
+
+/*
+ * Compile a UNIX shell pattern into a RE pattern
+ * and returns the corresponding 'match()' method.
+ *
+ * This function is LRU-cached by the module itself.
+ */
+static PyObject *
+get_matcher_function_impl(PyObject *module, PyObject *pattern)
+{
+ // translate the pattern into a RE pattern
+ assert(module != NULL);
+ PyObject *translated = fnmatch_translate_impl(module, pattern);
+ if (translated == NULL) {
+ return NULL;
+ }
+ fnmatchmodule_state *st = get_fnmatchmodule_state(module);
+ // compile the pattern
+ PyObject *compile_func = PyObject_GetAttr(st->re_module, &_Py_ID(compile));
+ if (compile_func == NULL) {
+ Py_DECREF(translated);
+ return NULL;
+ }
+ PyObject *compiled = PyObject_CallOneArg(compile_func, translated);
+ Py_DECREF(compile_func);
+ Py_DECREF(translated);
+ if (compiled == NULL) {
+ return NULL;
+ }
+ // get the compiled pattern matcher function
+ PyObject *matcher = PyObject_GetAttr(compiled, &_Py_ID(match));
+ Py_DECREF(compiled);
+ return matcher;
+}
+
+static PyMethodDef get_matcher_function_def = {
+ "get_matcher_function",
+ get_matcher_function_impl,
+ METH_O,
+ NULL
+};
+
+static int
+fnmatchmodule_load_translator(PyObject *module, fnmatchmodule_state *st)
+{
+ // make sure that this function is called once
+ assert(st->translator == NULL);
+ PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
+ if (maxsize == NULL) {
+ return -1;
+ }
+ PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+ if (cache == NULL) {
+ Py_DECREF(maxsize);
+ return -1;
+ }
+ PyObject *args[3] = {NULL, maxsize, Py_True};
+ size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+ PyObject *wrapper = PyObject_Vectorcall(cache, &args[1], nargsf, NULL);
+ Py_DECREF(maxsize);
+ Py_DECREF(cache);
+ if (wrapper == NULL) {
+ return -1;
+ }
+ assert(module != NULL);
+ PyObject *wrapped = PyCFunction_New(&get_matcher_function_def, module);
+ // reference on 'translator' will be removed upon module cleanup
+ st->translator = PyObject_CallOneArg(wrapper, wrapped);
+ Py_DECREF(wrapped);
+ Py_DECREF(wrapper);
+ if (st->translator == NULL) {
+ return -1;
+ }
+ return 0;
+}
+
+// ==== Cached re.escape() unit ===============================================
+
+/* Create an LRU-cached function for re.escape(). */
+static int
+fnmatchmodule_load_escapefunc(PyObject *Py_UNUSED(module),
+ fnmatchmodule_state *st)
+{
+ // make sure that this function is called once
+ assert(st->re_escape == NULL);
+ PyObject *maxsize = PyLong_FromLong(LRU_CACHE_SIZE);
+ if (maxsize == NULL) {
+ return -1;
+ }
+ PyObject *cache = _PyImport_GetModuleAttrString("functools", "lru_cache");
+ if (cache == NULL) {
+ Py_DECREF(maxsize);
+ return -1;
+ }
+ PyObject *wrapper = PyObject_CallOneArg(cache, maxsize);
+ Py_DECREF(maxsize);
+ Py_DECREF(cache);
+ if (wrapper == NULL) {
+ return -1;
+ }
+ assert(st->re_module != NULL);
+ PyObject *wrapped = PyObject_GetAttr(st->re_module, &_Py_ID(escape));
+ // reference on 'escapechar' will be removed upon module cleanup
+ st->re_escape = PyObject_CallOneArg(wrapper, wrapped);
+ Py_DECREF(wrapped);
+ Py_DECREF(wrapper);
+ if (st->re_escape == NULL) {
+ return -1;
+ }
+ return 0;
+}
+
+// ==== Cached re.sub() unit for set operation tokens =========================
+
+/* Create an LRU-cached function for re.compile('([&~|])').sub(). */
+static int
+fnmatchmodule_load_setops_re_sub(PyObject *Py_UNUSED(module),
+ fnmatchmodule_state *st)
+{
+ // make sure that this function is called once
+ assert(st->setops_re_subfn == NULL);
+ PyObject *pattern = PyUnicode_FromString("([&~|])");
+ CHECK_NOT_NULL_OR_ABORT(pattern);
+ PyObject *compiled = PyObject_CallMethodOneArg(st->re_module,
+ &_Py_ID(compile),
+ pattern);
+ Py_DECREF(pattern);
+ CHECK_NOT_NULL_OR_ABORT(compiled);
+ st->setops_re_subfn = PyObject_GetAttr(compiled, &_Py_ID(sub));
+ Py_DECREF(compiled);
+ CHECK_NOT_NULL_OR_ABORT(st->setops_re_subfn);
+ return 0;
+abort:
+ return -1;
+}
+
+// ==== Module data getters ===================================================
+
+static inline PyObject * /* reference to re.compile(pattern).match() */
+get_matcher_function(PyObject *module, PyObject *pattern)
+{
+ fnmatchmodule_state *st = get_fnmatchmodule_state(module);
+ assert(st->translator != NULL);
+ return PyObject_CallOneArg(st->translator, pattern);
+}
+
+static inline PyObject * /* reference to os.path.normcase() */
+get_platform_normcase_function(PyObject *module)
+{
+ fnmatchmodule_state *st = get_fnmatchmodule_state(module);
+ PyObject *os_path = PyObject_GetAttr(st->os_module, &_Py_ID(path));
+ if (os_path == NULL) {
+ return NULL;
+ }
+ PyObject *normcase = PyObject_GetAttr(os_path, &_Py_ID(normcase));
+ Py_DECREF(os_path);
+ return normcase;
+}
+
+// ==== Module state functions ================================================
+
+static int
+fnmatchmodule_exec(PyObject *module)
+{
+ // ---- def local macros --------------------------------------------------
+ /* Import a named module and store it in 'STATE->ATTRIBUTE'. */
+#define IMPORT_MODULE(STATE, ATTRIBUTE, MODULE_NAME) \
+ do { \
+ /* make sure that the attribute is initialized once */ \
+ assert(STATE->ATTRIBUTE == NULL); \
+ STATE->ATTRIBUTE = PyImport_ImportModule((MODULE_NAME)); \
+ CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE); \
+ } while (0)
+ /* Intern a literal STRING and store it in 'STATE->ATTRIBUTE'. */
+#define INTERN_STRING(STATE, ATTRIBUTE, STRING) \
+ do { \
+ /* make sure that the attribute is initialized once */ \
+ assert(STATE->ATTRIBUTE == NULL); \
+ STATE->ATTRIBUTE = PyUnicode_InternFromString((STRING)); \
+ CHECK_NOT_NULL_OR_ABORT(STATE->ATTRIBUTE); \
+ } while (0)
+ // ------------------------------------------------------------------------
+ fnmatchmodule_state *st = get_fnmatchmodule_state(module);
+ IMPORT_MODULE(st, os_module, "os");
+ IMPORT_MODULE(st, re_module, "re");
+ CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_translator(module, st));
+ CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_escapefunc(module, st));
+ INTERN_STRING(st, hyphen_str, "-");
+ INTERN_STRING(st, hyphen_esc_str, "\\-");
+ INTERN_STRING(st, backslash_str, "\\");
+ INTERN_STRING(st, backslash_esc_str, "\\\\");
+ CHECK_RET_CODE_OR_ABORT(fnmatchmodule_load_setops_re_sub(module, st));
+ INTERN_STRING(st, setops_repl_str, "\\\\\\1");
+ return 0;
+abort:
+ return -1;
+#undef INTERN_STRING
+#undef IMPORT_MODULE
+}
+
+static int
+fnmatchmodule_traverse(PyObject *m, visitproc visit, void *arg)
+{
+ fnmatchmodule_state *st = get_fnmatchmodule_state(m);
+ Py_VISIT(st->setops_repl_str);
+ Py_VISIT(st->setops_re_subfn);
+ Py_VISIT(st->backslash_esc_str);
+ Py_VISIT(st->backslash_str);
+ Py_VISIT(st->hyphen_esc_str);
+ Py_VISIT(st->hyphen_str);
+ Py_VISIT(st->re_escape);
+ Py_VISIT(st->translator);
+ Py_VISIT(st->re_module);
+ Py_VISIT(st->os_module);
+ return 0;
+}
+
+static int
+fnmatchmodule_clear(PyObject *m)
+{
+ fnmatchmodule_state *st = get_fnmatchmodule_state(m);
+ Py_CLEAR(st->setops_repl_str);
+ Py_CLEAR(st->setops_re_subfn);
+ Py_CLEAR(st->backslash_esc_str);
+ Py_CLEAR(st->backslash_str);
+ Py_CLEAR(st->hyphen_esc_str);
+ Py_CLEAR(st->hyphen_str);
+ Py_CLEAR(st->re_escape);
+ Py_CLEAR(st->translator);
+ Py_CLEAR(st->re_module);
+ Py_CLEAR(st->os_module);
+ return 0;
+}
+
+static inline void
+fnmatchmodule_free(void *m)
+{
+ (void)fnmatchmodule_clear((PyObject *)m);
+}
+
+/*[clinic input]
+module fnmatch
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=797aa965370a9ef2]*/
+
+/*[clinic input]
+fnmatch.filter -> object
+
+ names: object
+ pat as pattern: object
+
+Construct a list from the names in *names* matching *pat*.
+
+[clinic start generated code]*/
+
+static PyObject *
+fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern)
+/*[clinic end generated code: output=1a68530a2e3cf7d0 input=7ac729daad3b1404]*/
+{
+ PyObject *normcase = NULL; // for the 'goto abort' statements
+ normcase = get_platform_normcase_function(module);
+ CHECK_NOT_NULL_OR_ABORT(normcase);
+ PyObject *normalized_pattern = PyObject_CallOneArg(normcase, pattern);
+ CHECK_NOT_NULL_OR_ABORT(normalized_pattern);
+ // the matcher is cached with respect to the *normalized* pattern
+ PyObject *matcher = get_matcher_function(module, normalized_pattern);
+ Py_DECREF(normalized_pattern);
+ CHECK_NOT_NULL_OR_ABORT(matcher);
+ PyObject *filtered = _Py_fnmatch_filter(matcher, names, normcase);
+ Py_DECREF(matcher);
+ Py_DECREF(normcase);
+ return filtered;
+abort:
+ Py_XDECREF(normcase);
+ return NULL;
+}
+
+/*[clinic input]
+fnmatch.fnmatch -> bool
+
+ name: object
+ pat as pattern: object
+
+Test whether *name* matches *pat*.
+
+Patterns are Unix shell style:
+
+* matches everything
+? matches any single character
+[seq] matches any character in seq
+[!seq] matches any char not in seq
+
+An initial period in *name* is not special.
+Both *name* and *pat* are first case-normalized
+if the operating system requires it.
+
+If you don't want this, use fnmatchcase(name, pat).
+
+[clinic start generated code]*/
+
+static int
+fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern)
+/*[clinic end generated code: output=c9dc542e8d6933b6 input=279a4a4f2ddea6a2]*/
+{
+ PyObject *normcase = get_platform_normcase_function(module);
+ if (normcase == NULL) {
+ return -1;
+ }
+ // apply case normalization on both arguments
+ PyObject *norm_name = PyObject_CallOneArg(normcase, name);
+ if (norm_name == NULL) {
+ Py_DECREF(normcase);
+ return -1;
+ }
+ PyObject *norm_pattern = PyObject_CallOneArg(normcase, pattern);
+ Py_DECREF(normcase);
+ if (norm_pattern == NULL) {
+ Py_DECREF(norm_name);
+ return -1;
+ }
+ int matching = fnmatch_fnmatchcase_impl(module, norm_name, norm_pattern);
+ Py_DECREF(norm_pattern);
+ Py_DECREF(norm_name);
+ return matching;
+}
+
+/*[clinic input]
+fnmatch.fnmatchcase -> bool
+
+ name: object
+ pat as pattern: object
+
+Test whether *name* matches *pat*, including case.
+
+This is a version of fnmatch() which doesn't case-normalize
+its arguments.
+[clinic start generated code]*/
+
+static int
+fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern)
+/*[clinic end generated code: output=4d6b268169001876 input=91d62999c08fd55e]*/
+{
+ // fnmatchcase() does not apply any case normalization on the inputs
+ PyObject *matcher = get_matcher_function(module, pattern);
+ if (matcher == NULL) {
+ return -1;
+ }
+ // If 'name' is of incorrect type, it will be detected when calling
+ // the matcher function (we check 're.compile(pattern).match(name)').
+ PyObject *match = PyObject_CallOneArg(matcher, name);
+ Py_DECREF(matcher);
+ int matching = match == NULL ? -1 : !Py_IsNone(match);
+ Py_XDECREF(match);
+ return matching;
+}
+
+/*[clinic input]
+fnmatch.translate -> object
+
+ pat as pattern: object
+
+Translate a shell pattern *pat* to a regular expression.
+
+There is no way to quote meta-characters.
+[clinic start generated code]*/
+
+static PyObject *
+fnmatch_translate_impl(PyObject *module, PyObject *pattern)
+/*[clinic end generated code: output=77e0f5de9fbb59bd input=2cc1203a34c571fd]*/
+{
+ if (PyBytes_Check(pattern)) {
+ PyObject *decoded = PyUnicode_DecodeLatin1(PyBytes_AS_STRING(pattern),
+ PyBytes_GET_SIZE(pattern),
+ "strict");
+ CHECK_NOT_NULL_OR_ABORT(decoded);
+ PyObject *translated = _Py_fnmatch_translate(module, decoded);
+ Py_DECREF(decoded);
+ CHECK_NOT_NULL_OR_ABORT(translated);
+ PyObject *res = PyUnicode_AsLatin1String(translated);
+ Py_DECREF(translated);
+ return res;
+ }
+ else if (PyUnicode_Check(pattern)) {
+ return _Py_fnmatch_translate(module, pattern);
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError, INVALID_PATTERN_TYPE);
+ return NULL;
+ }
+abort:
+ return NULL;
+}
+
+// ==== Module specs ==========================================================
+
+// fmt: off
+PyDoc_STRVAR(fnmatchmodule_doc,
+"Filename matching with shell patterns.\n"
+"fnmatch(FILENAME, PATTERN) matches according to the local convention.\n"
+"fnmatchcase(FILENAME, PATTERN) always takes case in account.\n\n"
+"The functions operate by translating the pattern into a regular\n"
+"expression. They cache the compiled regular expressions for speed.\n\n"
+"The function translate(PATTERN) returns a regular expression\n"
+"corresponding to PATTERN. (It does not compile it.)");
+// fmt: on
+
+static PyMethodDef fnmatchmodule_methods[] = {
+ FNMATCH_FILTER_METHODDEF
+ FNMATCH_FNMATCH_METHODDEF
+ FNMATCH_FNMATCHCASE_METHODDEF
+ FNMATCH_TRANSLATE_METHODDEF
+ {NULL, NULL}
+};
+
+static struct PyModuleDef_Slot fnmatchmodule_slots[] = {
+ {Py_mod_exec, fnmatchmodule_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+ {0, NULL},
+};
+
+static struct PyModuleDef _fnmatchmodule = {
+ PyModuleDef_HEAD_INIT,
+ .m_name = "_fnmatch",
+ .m_doc = fnmatchmodule_doc,
+ .m_size = sizeof(fnmatchmodule_state),
+ .m_methods = fnmatchmodule_methods,
+ .m_slots = fnmatchmodule_slots,
+ .m_traverse = fnmatchmodule_traverse,
+ .m_clear = fnmatchmodule_clear,
+ .m_free = fnmatchmodule_free,
+};
+
+PyMODINIT_FUNC
+PyInit__fnmatch(void)
+{
+ return PyModuleDef_Init(&_fnmatchmodule);
+}
+
+#undef INVALID_PATTERN_TYPE
+#undef COMPILED_CACHE_SIZE
diff --git a/Modules/_fnmatch/clinic/_fnmatchmodule.c.h b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
new file mode 100644
index 00000000000000..c611f01673b326
--- /dev/null
+++ b/Modules/_fnmatch/clinic/_fnmatchmodule.c.h
@@ -0,0 +1,264 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(fnmatch_filter__doc__,
+"filter($module, /, names, pat)\n"
+"--\n"
+"\n"
+"Construct a list from the names in *names* matching *pat*.");
+
+#define FNMATCH_FILTER_METHODDEF \
+ {"filter", _PyCFunction_CAST(fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, fnmatch_filter__doc__},
+
+static PyObject *
+fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pattern);
+
+static PyObject *
+fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(names), &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"names", "pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "filter",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *names;
+ PyObject *pattern;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ names = args[0];
+ pattern = args[1];
+ return_value = fnmatch_filter_impl(module, names, pattern);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(fnmatch_fnmatch__doc__,
+"fnmatch($module, /, name, pat)\n"
+"--\n"
+"\n"
+"Test whether *name* matches *pat*.\n"
+"\n"
+"Patterns are Unix shell style:\n"
+"\n"
+"* matches everything\n"
+"? matches any single character\n"
+"[seq] matches any character in seq\n"
+"[!seq] matches any char not in seq\n"
+"\n"
+"An initial period in *name* is not special.\n"
+"Both *name* and *pat* are first case-normalized\n"
+"if the operating system requires it.\n"
+"\n"
+"If you don\'t want this, use fnmatchcase(name, pat).");
+
+#define FNMATCH_FNMATCH_METHODDEF \
+ {"fnmatch", _PyCFunction_CAST(fnmatch_fnmatch), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatch__doc__},
+
+static int
+fnmatch_fnmatch_impl(PyObject *module, PyObject *name, PyObject *pattern);
+
+static PyObject *
+fnmatch_fnmatch(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"name", "pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "fnmatch",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *name;
+ PyObject *pattern;
+ int _return_value;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ name = args[0];
+ pattern = args[1];
+ _return_value = fnmatch_fnmatch_impl(module, name, pattern);
+ if ((_return_value == -1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(fnmatch_fnmatchcase__doc__,
+"fnmatchcase($module, /, name, pat)\n"
+"--\n"
+"\n"
+"Test whether *name* matches *pat*, including case.\n"
+"\n"
+"This is a version of fnmatch() which doesn\'t case-normalize\n"
+"its arguments.");
+
+#define FNMATCH_FNMATCHCASE_METHODDEF \
+ {"fnmatchcase", _PyCFunction_CAST(fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, fnmatch_fnmatchcase__doc__},
+
+static int
+fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pattern);
+
+static PyObject *
+fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"name", "pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "fnmatchcase",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *name;
+ PyObject *pattern;
+ int _return_value;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ name = args[0];
+ pattern = args[1];
+ _return_value = fnmatch_fnmatchcase_impl(module, name, pattern);
+ if ((_return_value == -1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(fnmatch_translate__doc__,
+"translate($module, /, pat)\n"
+"--\n"
+"\n"
+"Translate a shell pattern *pat* to a regular expression.\n"
+"\n"
+"There is no way to quote meta-characters.");
+
+#define FNMATCH_TRANSLATE_METHODDEF \
+ {"translate", _PyCFunction_CAST(fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, fnmatch_translate__doc__},
+
+static PyObject *
+fnmatch_translate_impl(PyObject *module, PyObject *pattern);
+
+static PyObject *
+fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "translate",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ PyObject *pattern;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ pattern = args[0];
+ return_value = fnmatch_translate_impl(module, pattern);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=50f858ef4bfb569a input=a9049054013a1b77]*/
diff --git a/Modules/_fnmatch/filter.c b/Modules/_fnmatch/filter.c
new file mode 100644
index 00000000000000..bd1d6c8ec85073
--- /dev/null
+++ b/Modules/_fnmatch/filter.c
@@ -0,0 +1,48 @@
+/*
+ * Provide the implementation of the high-level matcher-based functions.
+ */
+
+#include "Python.h"
+
+PyObject *
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase)
+{
+ assert(normcase != NULL);
+ PyObject *iter = PyObject_GetIter(names);
+ if (iter == NULL) {
+ return NULL;
+ }
+ PyObject *res = PyList_New(0);
+ if (res == NULL) {
+ Py_DECREF(iter);
+ return NULL;
+ }
+ PyObject *name = NULL;
+ while ((name = PyIter_Next(iter))) {
+ PyObject *normalized = PyObject_CallOneArg(normcase, name);
+ if (normalized == NULL) {
+ goto abort;
+ }
+ PyObject *match = PyObject_CallOneArg(matcher, normalized);
+ Py_DECREF(normalized);
+ if (match == NULL) {
+ goto abort;
+ }
+ int matching = Py_IsNone(match) == 0;
+ Py_DECREF(match);
+ if (matching && PyList_Append(res, name) < 0) {
+ goto abort;
+ }
+ Py_DECREF(name);
+ }
+ Py_DECREF(iter);
+ if (PyErr_Occurred()) {
+ Py_CLEAR(res);
+ }
+ return res;
+abort:
+ Py_DECREF(name);
+ Py_DECREF(iter);
+ Py_DECREF(res);
+ return NULL;
+}
diff --git a/Modules/_fnmatch/macros.h b/Modules/_fnmatch/macros.h
new file mode 100644
index 00000000000000..a39586338ea62a
--- /dev/null
+++ b/Modules/_fnmatch/macros.h
@@ -0,0 +1,132 @@
+/*
+ * This file contains various macro definitions in order to reduce the
+ * number of lines in '_fnmatch'. Do not use them for something else.
+ */
+
+#ifndef _FNMATCH_MACROS_H
+#define _FNMATCH_MACROS_H
+
+// ==== Macro definitions =====================================================
+
+/*
+ * Check that STATUS is >= 0 or execute 'goto abort'.
+ *
+ * This macro is provided for convenience and should be
+ * carefully used if more resources should be released
+ * before jumping to the 'abort' label.
+ */
+#define CHECK_RET_CODE_OR_ABORT(STATUS) \
+ do { \
+ if ((STATUS) < 0) { \
+ assert(PyErr_Occurred()); \
+ goto abort; \
+ } \
+ } while (0)
+
+/*
+ * Identical to CHECK_RET_CODE_OR_ABORT but where the
+ * argument is semantically used as a positive integer.
+ */
+#define CHECK_UNSIGNED_INT_OR_ABORT CHECK_RET_CODE_OR_ABORT
+
+/*
+ * Check that OBJ is not NULL or execute 'goto abort'.
+ *
+ * This macro is provided for convenience and should be
+ * carefully used if more resources should be released
+ * before jumping to the 'abort' label.
+ */
+#define CHECK_NOT_NULL_OR_ABORT(OBJ) \
+ do { \
+ if ((OBJ) == NULL) { \
+ goto abort; \
+ } \
+ } while (0)
+
+// The following _WRITE_* and _WRITE_*_OR macros do NOT check their inputs
+// since they directly delegate to the _PyUnicodeWriter_Write* underlying
+// function. In particular, the caller is responsible for type safety.
+
+/* Cast WRITER and call _PyUnicodeWriter_WriteChar(). */
+#define _WRITE_CHAR(WRITER, CHAR) \
+ _PyUnicodeWriter_WriteChar((_PyUnicodeWriter *)(WRITER), (CHAR))
+
+/* Cast WRITER and call _PyUnicodeWriter_WriteASCIIString(). */
+#define _WRITE_ASCII(WRITER, STRING, LENGTH) \
+ _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter *)(WRITER), \
+ (STRING), (LENGTH))
+
+/* Cast WRITER and call _PyUnicodeWriter_WriteStr(). */
+#define _WRITE_STRING(WRITER, STRING) \
+ _PyUnicodeWriter_WriteStr((_PyUnicodeWriter *)(WRITER), (STRING))
+
+/* Cast WRITER and call _PyUnicodeWriter_WriteSubstring(). */
+#define _WRITE_SUBSTRING(WRITER, STRING, START, STOP) \
+ _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter *)(WRITER), \
+ (STRING), (START), (STOP))
+
+// ----------------------------------------------------------------------------
+
+/* Write the character CHAR or jump to the 'abort' label on failure. */
+#define WRITE_CHAR_OR_ABORT(WRITER, CHAR) \
+ CHECK_RET_CODE_OR_ABORT(_WRITE_CHAR((WRITER), (CHAR)))
+
+/*
+ * Write an ASCII string STRING of given length LENGTH,
+ * or jump to the 'abort' label on failure.
+ */
+#define WRITE_ASCII_OR_ABORT(WRITER, ASCII, LENGTH) \
+ CHECK_RET_CODE_OR_ABORT(_WRITE_ASCII((WRITER), (ASCII), (LENGTH)))
+
+/* Write the string STRING or jump to the 'abort' label on failure. */
+#define WRITE_STRING_OR_ABORT(WRITER, STRING) \
+ CHECK_RET_CODE_OR_ABORT(_WRITE_STRING((WRITER), (STRING)))
+
+/*
+ * Write the substring STRING[START:STOP] (no-op if empty)
+ * or jump to the 'abort' label on failure.
+ */
+#define WRITE_SUBSTRING_OR_ABORT(WRITER, STRING, START, STOP) \
+ do { \
+ const Py_ssize_t _START = (START); \
+ const Py_ssize_t _STOP = (STOP); \
+ int _RC = _WRITE_SUBSTRING((WRITER), (STRING), _START, _STOP); \
+ CHECK_RET_CODE_OR_ABORT(_RC); \
+ } while (0)
+
+// ----------------------------------------------------------------------------
+
+/* Replace backslashes in STRING by escaped backslashes. */
+#define BACKSLASH_REPLACE(STATE, STRING) \
+ PyObject_CallMethodObjArgs( \
+ (STRING), \
+ &_Py_ID(replace), \
+ (STATE)->backslash_str, \
+ (STATE)->backslash_esc_str, \
+ NULL \
+ )
+
+/* Replace hyphens in STRING by escaped hyphens. */
+#define HYPHEN_REPLACE(STATE, STRING) \
+ PyObject_CallMethodObjArgs( \
+ (STRING), \
+ &_Py_ID(replace), \
+ (STATE)->hyphen_str, \
+ (STATE)->hyphen_esc_str, \
+ NULL \
+ )
+
+/*
+ * Escape set operations in STRING using re.sub().
+ *
+ * SETOPS_RE_SUB_METH is a reference to re.compile('([&~|])').sub().
+ */
+#define SETOPS_REPLACE(STATE, STRING, SETOPS_RE_SUB_METH) \
+ PyObject_CallFunctionObjArgs( \
+ (SETOPS_RE_SUB_METH), \
+ (STATE)->setops_repl_str, \
+ (STRING), \
+ NULL \
+ )
+
+#endif // _FNMATCH_MACROS_H
diff --git a/Modules/_fnmatch/translate.c b/Modules/_fnmatch/translate.c
new file mode 100644
index 00000000000000..3dbd0d59d094d2
--- /dev/null
+++ b/Modules/_fnmatch/translate.c
@@ -0,0 +1,563 @@
+/*
+ * C accelerator for the translation function from UNIX shell patterns
+ * to RE patterns.
+ */
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "macros.h"
+#include "util.h" // for get_fnmatchmodulestate_state()
+
+#include "pycore_runtime.h" // for _Py_ID()
+
+// ==== Helper declarations ===================================================
+
+/*
+ * Write re.escape(ch).
+ *
+ * This returns the number of written characters, or -1 if an error occurred.
+ */
+static Py_ssize_t
+escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch);
+
+/*
+ * Construct a regular expression out of a UNIX-style expression.
+ *
+ * The expression to translate is the content of an '[(BLOCK)]' expression,
+ * which contains single unicode characters or character ranges (e.g., 'a-z').
+ *
+ * By convention, 'start' and 'stop' represent the INCLUSIVE start index
+ * and EXCLUSIVE stop index of BLOCK in 'pattern'. Stated otherwise:
+ *
+ * pattern[start] == BLOCK[0]
+ * pattern[stop] == ']'
+ *
+ * For instance, for "ab[c-f]g[!1-5]", the values of 'start' and 'stop'
+ * for the sub-pattern '[c-f]' are 3 and 6 respectively, while their
+ * values for '[!1-5]' are 9 and 13 respectively.
+ *
+ * The 'pattern_str_find_meth' argument is a reference to pattern.find().
+ */
+static PyObject *
+translate_expression(fnmatchmodule_state *state,
+ PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+ PyObject *pattern_str_find_meth);
+
+/*
+ * Write the translated pattern obtained by translate_expression().
+ *
+ * This returns the number of written characters, or -1 if an error occurred.
+ */
+static Py_ssize_t
+write_expression(fnmatchmodule_state *state,
+ PyUnicodeWriter *writer, PyObject *expression);
+
+/*
+ * Build the final regular expression by processing the wildcards.
+ *
+ * The position of each wildcard in 'pattern' is given by 'indices'.
+ */
+static PyObject *
+process_wildcards(PyObject *pattern, PyObject *indices);
+
+// ==== API implementation ====================================================
+
+PyObject *
+_Py_fnmatch_translate(PyObject *module, PyObject *pattern)
+{
+ assert(PyUnicode_Check(pattern));
+ fnmatchmodule_state *state = get_fnmatchmodule_state(module);
+ const Py_ssize_t maxind = PyUnicode_GET_LENGTH(pattern);
+
+ // We would write less data if there are successive '*',
+ // which usually happens once or twice in the pattern.
+ // Otherwise, we write >= maxind characters since escaping
+ // them always add more characters.
+ //
+ // Note that only '()[]{}?*+-|^$\\.&~# \t\n\r\v\f' need to
+ // be escaped when translated to RE patterns and '*' and '?'
+ // are already handled without being escaped.
+ //
+ // In general, UNIX style patterns are more likely to contain
+ // wildcards than characters to be escaped, with the exception
+ // of '-', '\' and '~' (we usually want to match filenmaes),
+ // and there is a sparse number of them. Therefore, we only
+ // estimate the number of characters to be written to be the
+ // same as the number of characters in the pattern.
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(maxind);
+ if (writer == NULL) {
+ return NULL;
+ }
+
+ // ---- decl local objects ------------------------------------------------
+ // list containing the indices where '*' has a special meaning
+ PyObject *wildcard_indices = NULL;
+ // call-level cached functions
+ PyObject *pattern_str_find_meth = NULL; // pattern.find()
+ // ---- def local objects -------------------------------------------------
+ wildcard_indices = PyList_New(0);
+ CHECK_NOT_NULL_OR_ABORT(wildcard_indices);
+ pattern_str_find_meth = PyObject_GetAttr(pattern, &_Py_ID(find));
+ CHECK_NOT_NULL_OR_ABORT(pattern_str_find_meth);
+ // ------------------------------------------------------------------------
+ const unsigned int pattern_kind = PyUnicode_KIND(pattern);
+ const void *const pattern_data = PyUnicode_DATA(pattern);
+ // ---- def local macros --------------------------------------------------
+#define READ_CHAR(IND) PyUnicode_READ(pattern_kind, pattern_data, IND)
+ /* advance IND if the character is CHAR */
+#define ADVANCE_IF_CHAR_IS(CHAR, IND, MAXIND) \
+ do { \
+ if ((IND) < (MAXIND) && READ_CHAR(IND) == (CHAR)) { \
+ ++IND; \
+ } \
+ } while (0)
+ // ------------------------------------------------------------------------
+ Py_ssize_t i = 0; // current index
+ Py_ssize_t written = 0; // number of characters written
+ while (i < maxind) {
+ Py_UCS4 chr = READ_CHAR(i++);
+ switch (chr) {
+ case '*': {
+ // translate wildcard '*' (fnmatch) into optional '.' (regex)
+ WRITE_CHAR_OR_ABORT(writer, '*');
+ // skip duplicated '*'
+ for (; i < maxind && READ_CHAR(i) == '*'; ++i);
+ // store the position of the wildcard
+ PyObject *wildcard_index = PyLong_FromSsize_t(written++);
+ CHECK_NOT_NULL_OR_ABORT(wildcard_index);
+ int rc = PyList_Append(wildcard_indices, wildcard_index);
+ Py_DECREF(wildcard_index);
+ CHECK_RET_CODE_OR_ABORT(rc);
+ break;
+ }
+ case '?': {
+ // translate optional '?' (fnmatch) into optional '.' (regex)
+ WRITE_CHAR_OR_ABORT(writer, '.');
+ ++written; // increase the expected result's length
+ break;
+ }
+ case '[': {
+ assert(READ_CHAR(i - 1) == '[');
+ Py_ssize_t j = i;
+ ADVANCE_IF_CHAR_IS('!', j, maxind); // [!
+ ADVANCE_IF_CHAR_IS(']', j, maxind); // [!] or []
+ for (; j < maxind && READ_CHAR(j) != ']'; ++j); // locate ']'
+ if (j >= maxind) {
+ WRITE_ASCII_OR_ABORT(writer, "\\[", 2);
+ written += 2; // we just wrote 2 characters
+ break; // explicit early break for clarity
+ }
+ else {
+ assert(READ_CHAR(j) == ']');
+ Py_ssize_t pos = PyUnicode_FindChar(pattern, '-', i, j, 1);
+ if (pos == -2) {
+ goto abort;
+ }
+ PyObject *expr = NULL;
+ if (pos == -1) {
+ PyObject *tmp = PyUnicode_Substring(pattern, i, j);
+ CHECK_NOT_NULL_OR_ABORT(tmp);
+ expr = BACKSLASH_REPLACE(state, tmp);
+ Py_DECREF(tmp);
+ }
+ else {
+ expr = translate_expression(state, pattern, i, j,
+ pattern_str_find_meth);
+ }
+ CHECK_NOT_NULL_OR_ABORT(expr);
+ Py_ssize_t expr_len = write_expression(state, writer, expr);
+ Py_DECREF(expr);
+ CHECK_UNSIGNED_INT_OR_ABORT(expr_len);
+ written += expr_len;
+ i = j + 1; // jump to the character after ']'
+ break; // explicit early break for clarity
+ }
+ }
+ default: {
+ Py_ssize_t t = escape_char(state, writer, chr);
+ CHECK_UNSIGNED_INT_OR_ABORT(t);
+ written += t;
+ break;
+ }
+ }
+ }
+#undef ADVANCE_IF_CHAR_IS
+#undef READ_CHAR
+ Py_DECREF(pattern_str_find_meth);
+ PyObject *translated = PyUnicodeWriter_Finish(writer);
+ if (translated == NULL) {
+ Py_DECREF(wildcard_indices);
+ return NULL;
+ }
+ PyObject *res = process_wildcards(translated, wildcard_indices);
+ Py_DECREF(translated);
+ Py_DECREF(wildcard_indices);
+ return res;
+abort:
+ Py_XDECREF(pattern_str_find_meth);
+ Py_XDECREF(wildcard_indices);
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+}
+
+// ==== Helper implementations ================================================
+
+/* taken from unicodeobject.c */
+static inline PyObject *
+unicode_char(Py_UCS4 ch)
+{
+#define MAX_UNICODE 0x10ffff
+ assert(ch <= MAX_UNICODE);
+#undef MAX_UNICODE
+ if (ch < 256) {
+ return _Py_LATIN1_CHR(ch);
+ }
+ PyObject *unicode = PyUnicode_New(1, ch);
+ if (unicode == NULL) {
+ return NULL;
+ }
+ assert(PyUnicode_KIND(unicode) != PyUnicode_1BYTE_KIND);
+ if (PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND) {
+ PyUnicode_2BYTE_DATA(unicode)[0] = (Py_UCS2)ch;
+ }
+ else {
+ assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+ PyUnicode_4BYTE_DATA(unicode)[0] = ch;
+ }
+ assert(_PyUnicode_CheckConsistency(unicode, 1));
+ return unicode;
+}
+
+static Py_ssize_t
+escape_char(fnmatchmodule_state *state, PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+ PyObject *str = unicode_char(ch);
+ CHECK_NOT_NULL_OR_ABORT(str);
+ PyObject *escaped = PyObject_CallOneArg(state->re_escape, str);
+ Py_DECREF(str);
+ CHECK_NOT_NULL_OR_ABORT(escaped);
+ Py_ssize_t written = PyUnicode_GET_LENGTH(escaped);
+ int rc = _WRITE_STRING(writer, escaped);
+ Py_DECREF(escaped);
+ CHECK_RET_CODE_OR_ABORT(rc);
+ return written;
+abort:
+ return -1;
+}
+
+/*
+ * Extract a list of chunks from the pattern group described by start and stop.
+ *
+ * For instance, the chunks for [a-z0-9] or [!a-z0-9] are ['a', 'z0', '9'].
+ *
+ * See translate_expression() for its usage.
+ */
+static PyObject *
+split_expression(fnmatchmodule_state *state,
+ PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+ PyObject *str_find_func)
+{
+ // ---- decl local objects ------------------------------------------------
+ PyObject *chunks = NULL, *maxind = NULL;
+ PyObject *hyphen = state->hyphen_str;
+ // ---- def local objects -------------------------------------------------
+ chunks = PyList_New(0);
+ CHECK_NOT_NULL_OR_ABORT(chunks);
+ maxind = PyLong_FromSsize_t(stop);
+ CHECK_NOT_NULL_OR_ABORT(maxind);
+ // ---- def local macros --------------------------------------------------
+ /* add pattern[START:STOP] to the list of chunks */
+#define ADD_CHUNK(START, STOP) \
+ do { \
+ PyObject *chunk = PyUnicode_Substring(pattern, (START), (STOP)); \
+ CHECK_NOT_NULL_OR_ABORT(chunk); \
+ int rc = PyList_Append(chunks, chunk); \
+ Py_DECREF(chunk); \
+ CHECK_RET_CODE_OR_ABORT(rc); \
+ } while (0)
+ // ------------------------------------------------------------------------
+ Py_ssize_t chunk_start = start;
+ bool is_complement = PyUnicode_READ_CHAR(pattern, start) == '!';
+ // skip '!' character (it is handled separately in write_expression())
+ Py_ssize_t ind = is_complement ? start + 2 : start + 1;
+ while (ind < stop) {
+ PyObject *p_chunk_stop = PyObject_CallFunction(str_find_func, "OnO",
+ hyphen, ind, maxind);
+ CHECK_NOT_NULL_OR_ABORT(p_chunk_stop);
+ Py_ssize_t chunk_stop = PyLong_AsSsize_t(p_chunk_stop);
+ Py_DECREF(p_chunk_stop);
+ if (chunk_stop < 0) {
+ if (PyErr_Occurred()) {
+ goto abort;
+ }
+ // -1 here means that '-' was not found
+ assert(chunk_stop == -1);
+ break;
+ }
+ ADD_CHUNK(chunk_start, chunk_stop);
+ chunk_start = chunk_stop + 1; // jump after '-'
+ ind = chunk_stop + 3; // ensure a non-empty next chunk
+ }
+ if (chunk_start < stop) {
+ ADD_CHUNK(chunk_start, stop);
+ }
+ else {
+ Py_ssize_t chunkscount = PyList_GET_SIZE(chunks);
+ assert(chunkscount > 0);
+ PyObject *chunk = PyList_GET_ITEM(chunks, chunkscount - 1);
+ assert(chunk != NULL);
+ PyObject *str = PyUnicode_Concat(chunk, hyphen);
+ // PyList_SetItem() does not create a new reference on 'str'
+ // so we should not decref 'str' after the call, unless there
+ // is an issue while setting the item.
+ if (str == NULL || PyList_SetItem(chunks, chunkscount - 1, str) < 0) {
+ Py_XDECREF(str);
+ goto abort;
+ }
+ }
+#undef ADD_CHUNK
+ Py_DECREF(maxind);
+ return chunks;
+abort:
+ Py_XDECREF(maxind);
+ Py_XDECREF(chunks);
+ return NULL;
+}
+
+/*
+ * Remove empty ranges (they are invalid in RE).
+ *
+ * See translate_expression() for its usage.
+ */
+static int
+simplify_expression(PyObject *chunks)
+{
+ // for k in range(len(chunks) - 1, 0, -1):
+ for (Py_ssize_t k = PyList_GET_SIZE(chunks) - 1; k > 0; --k) {
+ PyObject *c1 = PyList_GET_ITEM(chunks, k - 1);
+ assert(c1 != NULL);
+ Py_ssize_t c1len = PyUnicode_GET_LENGTH(c1);
+ assert(c1len > 0);
+
+ PyObject *c2 = PyList_GET_ITEM(chunks, k);
+ assert(c2 != NULL);
+ Py_ssize_t c2len = PyUnicode_GET_LENGTH(c2);
+ assert(c2len > 0);
+
+ if (PyUnicode_READ_CHAR(c1, c1len - 1) > PyUnicode_READ_CHAR(c2, 0)) {
+ Py_ssize_t olen = c1len + c2len - 2;
+ assert(olen >= 0);
+ PyObject *str = NULL;
+ if (olen == 0) { // c1[:1] + c2[1:] == ''
+ str = Py_GetConstant(Py_CONSTANT_EMPTY_STR);
+ assert(_Py_IsImmortal(str));
+ }
+ else if (c1len == 1) { // c1[:1] + c2[1:] == c2[1:]
+ assert(c2len > 1);
+ str = PyUnicode_Substring(c2, 1, c2len);
+ }
+ else if (c2len == 1) { // c1[:1] + c2[1:] == c1[:1]
+ assert(c1len > 1);
+ str = PyUnicode_Substring(c1, 0, c1len - 1);
+ }
+ else {
+ assert(c1len > 1);
+ assert(c2len > 1);
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(olen);
+ CHECK_NOT_NULL_OR_ABORT(writer);
+ // all but the last character in the first chunk
+ if (_WRITE_SUBSTRING(writer, c1, 0, c1len - 1) < 0) {
+ PyUnicodeWriter_Discard(writer);
+ goto abort;
+ }
+ // all but the first character in the second chunk
+ if (_WRITE_SUBSTRING(writer, c2, 1, c2len) < 0) {
+ PyUnicodeWriter_Discard(writer);
+ goto abort;
+ }
+ str = PyUnicodeWriter_Finish(writer);
+ }
+ // PyList_SetItem() does not create a new reference on 'str'
+ // so we should not decref 'str' after the call, unless there
+ // is an issue while setting the item.
+ if (str == NULL || PyList_SetItem(chunks, k - 1, str) < 0) {
+ Py_XDECREF(str);
+ goto abort;
+ }
+ CHECK_RET_CODE_OR_ABORT(PySequence_DelItem(chunks, k));
+ }
+ }
+ return 0;
+abort:
+ return -1;
+}
+
+/*
+ * Escape backslashes and hyphens for set difference (--),
+ * but hyphens that create ranges should not be escaped.
+ *
+ * See translate_expression() for its usage.
+ */
+static int
+escape_expression(fnmatchmodule_state *state, PyObject *chunks)
+{
+ for (Py_ssize_t c = 0; c < PyList_GET_SIZE(chunks); ++c) {
+ PyObject *s0 = PyList_GET_ITEM(chunks, c);
+ assert(s0 != NULL);
+ PyObject *s1 = BACKSLASH_REPLACE(state, s0);
+ CHECK_NOT_NULL_OR_ABORT(s1);
+ PyObject *s2 = HYPHEN_REPLACE(state, s1);
+ Py_DECREF(s1);
+ // PyList_SetItem() does not create a new reference on 's2'
+ // so we should not decref 's2' after the call, unless there
+ // is an issue while setting the item.
+ if (s2 == NULL || PyList_SetItem(chunks, c, s2) < 0) {
+ Py_XDECREF(s2);
+ goto abort;
+ }
+ }
+ return 0;
+abort:
+ return -1;
+}
+
+static PyObject *
+translate_expression(fnmatchmodule_state *state,
+ PyObject *pattern, Py_ssize_t start, Py_ssize_t stop,
+ PyObject *pattern_str_find_meth)
+{
+ PyObject *chunks = split_expression(state, pattern, start, stop,
+ pattern_str_find_meth);
+ CHECK_NOT_NULL_OR_ABORT(chunks);
+ // remove empty ranges
+ CHECK_RET_CODE_OR_ABORT(simplify_expression(chunks));
+ // escape backslashes and set differences
+ CHECK_RET_CODE_OR_ABORT(escape_expression(state, chunks));
+ PyObject *res = PyUnicode_Join(state->hyphen_str, chunks);
+ Py_DECREF(chunks);
+ return res;
+abort:
+ Py_XDECREF(chunks);
+ return NULL;
+}
+
+static Py_ssize_t
+write_expression(fnmatchmodule_state *state,
+ PyUnicodeWriter *writer, PyObject *expression)
+{
+ PyObject *safe_expression = NULL; // for the 'goto abort' statements
+ Py_ssize_t grouplen = PyUnicode_GET_LENGTH(expression);
+ if (grouplen == 0) {
+ // empty range: never match
+ WRITE_ASCII_OR_ABORT(writer, "(?!)", 4);
+ return 4;
+ }
+ Py_UCS4 token = PyUnicode_READ_CHAR(expression, 0);
+ if (grouplen == 1 && token == '!') {
+ // negated empty range: match any character
+ WRITE_CHAR_OR_ABORT(writer, '.');
+ return 1;
+ }
+ Py_ssize_t extra = 2; // '[' and ']'
+ WRITE_CHAR_OR_ABORT(writer, '[');
+ // escape set operations as late as possible
+ safe_expression = SETOPS_REPLACE(state, expression, state->setops_re_subfn);
+ CHECK_NOT_NULL_OR_ABORT(safe_expression);
+ switch (token) {
+ case '!': {
+ WRITE_CHAR_OR_ABORT(writer, '^'); // replace '!' by '^'
+ WRITE_SUBSTRING_OR_ABORT(writer, safe_expression, 1, grouplen);
+ break;
+ }
+ case '^':
+ case '[': {
+ WRITE_CHAR_OR_ABORT(writer, '\\');
+ ++extra; // because we wrote '\\'
+ WRITE_STRING_OR_ABORT(writer, safe_expression);
+ break;
+ }
+ default: {
+ WRITE_STRING_OR_ABORT(writer, safe_expression);
+ break;
+ }
+ }
+ Py_DECREF(safe_expression);
+ WRITE_CHAR_OR_ABORT(writer, ']');
+ return grouplen + extra;
+abort:
+ Py_XDECREF(safe_expression);
+ return -1;
+}
+
+static PyObject *
+process_wildcards(PyObject *pattern, PyObject *indices)
+{
+ const Py_ssize_t n = PyUnicode_GET_LENGTH(pattern);
+ const Py_ssize_t m = PyList_GET_SIZE(indices);
+ // Let m = len(indices) and n = len(pattern). By construction,
+ //
+ // pattern = [PREFIX] [[(* INNER) ... (* INNER)] (*) [OUTER]]
+ //
+ // where [...] is an optional group and (...) is a required group.
+ //
+ // The algorithm is as follows:
+ //
+ // - Write "(?s:".
+ // - Write the optional PREFIX.
+ // - Write an INNER group (* INNER) as "(?>.*?" + INNER + ")".
+ // - Write ".*" instead of the last wildcard.
+ // - Write an optional OUTER string normally.
+ // - Write ")\\Z".
+ //
+ // If m = 0, the writer needs n + 7 characters. Otherwise, it requires
+ // exactly n + 6(m-1) + 1 + 7 = n + 6m + 2 characters, where the "+1"
+ // is due to the fact that writing ".*" instead of "*" only increases
+ // the total length of the pattern by 1 (and not by 2).
+ const Py_ssize_t reslen = m == 0 ? (n + 7) : (n + 6 * m + 2);
+ PyUnicodeWriter *writer = PyUnicodeWriter_Create(reslen);
+ if (writer == NULL) {
+ return NULL;
+ }
+ // ---- def local macros --------------------------------------------------
+#define LOAD_WILDCARD_INDEX(VAR, IND) \
+ do { \
+ VAR = PyLong_AsSsize_t(PyList_GET_ITEM(indices, (IND))); \
+ /* wildcard indices must be >= 0 */ \
+ CHECK_UNSIGNED_INT_OR_ABORT(VAR); \
+ } while (0)
+ // ------------------------------------------------------------------------
+ WRITE_ASCII_OR_ABORT(writer, "(?s:", 4);
+ if (m == 0) {
+ WRITE_STRING_OR_ABORT(writer, pattern);
+ }
+ else {
+ Py_ssize_t i = 0, j = -1;
+ // process the optional PREFIX
+ LOAD_WILDCARD_INDEX(j, 0);
+ WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j);
+ i = j + 1;
+ for (Py_ssize_t k = 1; k < m; ++k) {
+ // process the (* INNER) groups
+ LOAD_WILDCARD_INDEX(j, k);
+ assert(i < j);
+ // write the atomic RE group '(?>.*?' + INNER + ')'
+ WRITE_ASCII_OR_ABORT(writer, "(?>.*?", 6);
+ WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, j);
+ WRITE_CHAR_OR_ABORT(writer, ')');
+ i = j + 1;
+ }
+ // handle the (*) [OUTER] part
+ WRITE_ASCII_OR_ABORT(writer, ".*", 2);
+ WRITE_SUBSTRING_OR_ABORT(writer, pattern, i, n);
+ }
+ WRITE_ASCII_OR_ABORT(writer, ")\\Z", 3);
+ PyObject *res = PyUnicodeWriter_Finish(writer);
+ assert(res == NULL || PyUnicode_GET_LENGTH(res) == reslen);
+ return res;
+abort:
+ PyUnicodeWriter_Discard(writer);
+ return NULL;
+#undef LOAD_WILDCARD_INDEX
+}
diff --git a/Modules/_fnmatch/util.h b/Modules/_fnmatch/util.h
new file mode 100644
index 00000000000000..276921328dd868
--- /dev/null
+++ b/Modules/_fnmatch/util.h
@@ -0,0 +1,70 @@
+/*
+ * This file contains helper prototypes and structures.
+ */
+
+#ifndef _FNMATCH_UTIL_H
+#define _FNMATCH_UTIL_H
+
+#include "Python.h"
+
+typedef struct {
+ PyObject *os_module; // import os
+ PyObject *re_module; // import re
+
+ PyObject *translator; // LRU-cached translation unit
+ PyObject *re_escape; // LRU-cached re.escape() function
+
+ // strings used by translate.c
+ PyObject *hyphen_str; // hyphen '-'
+ PyObject *hyphen_esc_str; // escaped hyphen '\\-'
+
+ PyObject *backslash_str; // backslash '\\'
+ PyObject *backslash_esc_str; // escaped backslash '\\\\'
+
+ /* set operation tokens (&&, ~~ and ||) are not supported in regex */
+ PyObject *setops_re_subfn; // cached re.compile('([&~|])').sub()
+ PyObject *setops_repl_str; // replacement pattern '\\\\\\1'
+} fnmatchmodule_state;
+
+static inline fnmatchmodule_state *
+get_fnmatchmodule_state(PyObject *module)
+{
+ void *state = PyModule_GetState(module);
+ assert(state != NULL);
+ return (fnmatchmodule_state *)state;
+}
+
+// ==== Helper prototypes =====================================================
+
+/*
+ * Returns a list of matched names, or NULL if an error occurred.
+ *
+ * Parameters
+ *
+ * matcher A reference to the 'match()' method of a compiled pattern.
+ * names An iterable of strings (str or bytes objects) to match.
+ * normcase A reference to os.path.normcase().
+ *
+ * This is equivalent to:
+ *
+ * [name for name in names if matcher(normcase(name))]
+ */
+extern PyObject *
+_Py_fnmatch_filter(PyObject *matcher, PyObject *names, PyObject *normcase);
+
+/*
+ * C accelerator for translating UNIX shell patterns into RE patterns.
+ *
+ * Parameters
+ *
+ * module A module with a state given by get_fnmatchmodule_state().
+ * pattern A Unicode object to translate.
+ *
+ * Returns
+ *
+ * A translated unicode RE pattern.
+ */
+extern PyObject *
+_Py_fnmatch_translate(PyObject *module, PyObject *pattern);
+
+#endif // _FNMATCH_UTIL_H
diff --git a/Modules/clinic/_fnmatchmodule.c.h b/Modules/clinic/_fnmatchmodule.c.h
new file mode 100644
index 00000000000000..4b12f33113d3fb
--- /dev/null
+++ b/Modules/clinic/_fnmatchmodule.c.h
@@ -0,0 +1,185 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_fnmatch_filter__doc__,
+"filter($module, /, names, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_FILTER_METHODDEF \
+ {"filter", _PyCFunction_CAST(_fnmatch_filter), METH_FASTCALL|METH_KEYWORDS, _fnmatch_filter__doc__},
+
+static PyObject *
+_fnmatch_filter_impl(PyObject *module, PyObject *names, PyObject *pat);
+
+static PyObject *
+_fnmatch_filter(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(names), &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"names", "pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "filter",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *names;
+ PyObject *pat;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ names = args[0];
+ pat = args[1];
+ return_value = _fnmatch_filter_impl(module, names, pat);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_fnmatchcase__doc__,
+"fnmatchcase($module, /, name, pat)\n"
+"--\n"
+"\n"
+"Test whether `name` matches `pattern`, including case.\n"
+"\n"
+"This is a version of fnmatch() which doesn\'t case-normalize\n"
+"its arguments.");
+
+#define _FNMATCH_FNMATCHCASE_METHODDEF \
+ {"fnmatchcase", _PyCFunction_CAST(_fnmatch_fnmatchcase), METH_FASTCALL|METH_KEYWORDS, _fnmatch_fnmatchcase__doc__},
+
+static int
+_fnmatch_fnmatchcase_impl(PyObject *module, PyObject *name, PyObject *pat);
+
+static PyObject *
+_fnmatch_fnmatchcase(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(name), &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"name", "pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "fnmatchcase",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *name;
+ PyObject *pat;
+ int _return_value;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ name = args[0];
+ pat = args[1];
+ _return_value = _fnmatch_fnmatchcase_impl(module, name, pat);
+ if ((_return_value == -1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_fnmatch_translate__doc__,
+"translate($module, /, pat)\n"
+"--\n"
+"\n");
+
+#define _FNMATCH_TRANSLATE_METHODDEF \
+ {"translate", _PyCFunction_CAST(_fnmatch_translate), METH_FASTCALL|METH_KEYWORDS, _fnmatch_translate__doc__},
+
+static PyObject *
+_fnmatch_translate_impl(PyObject *module, PyObject *pattern);
+
+static PyObject *
+_fnmatch_translate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(pat), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"pat", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "translate",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ PyObject *pattern;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ pattern = args[0];
+ return_value = _fnmatch_translate_impl(module, pattern);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=b0366b259b101bdf input=a9049054013a1b77]*/
diff --git a/PC/config.c b/PC/config.c
index b744f711b0d636..7c7c2540118cf2 100644
--- a/PC/config.c
+++ b/PC/config.c
@@ -10,6 +10,7 @@ extern PyObject* PyInit_array(void);
extern PyObject* PyInit_binascii(void);
extern PyObject* PyInit_cmath(void);
extern PyObject* PyInit_errno(void);
+extern PyObject* PyInit__fnmatch(void);
extern PyObject* PyInit_faulthandler(void);
extern PyObject* PyInit__tracemalloc(void);
extern PyObject* PyInit_gc(void);
@@ -91,6 +92,7 @@ struct _inittab _PyImport_Inittab[] = {
{"binascii", PyInit_binascii},
{"cmath", PyInit_cmath},
{"errno", PyInit_errno},
+ {"_fnmatch", PyInit__fnmatch},
{"faulthandler", PyInit_faulthandler},
{"gc", PyInit_gc},
{"math", PyInit_math},
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 9e3af689f4a288..2083072f6cf8cf 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -369,6 +369,8 @@
+
+
@@ -474,6 +476,9 @@
+
+
+
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 31f7971bda845d..301030d50b5733 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -252,6 +252,12 @@
Modules
+
+ Modules\_fnmatch
+
+
+ Modules\_fnmatch
+
Modules\_io
@@ -1061,6 +1067,15 @@
Modules
+
+ Modules\_fnmatch
+
+
+ Modules\_fnmatch
+
+
+ Modules\_fnmatch
+
Modules\_io
diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h
index 4d595d98445a05..269c7b34e9d477 100644
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -32,6 +32,7 @@ static const char* _Py_stdlib_module_names[] = {
"_dbm",
"_decimal",
"_elementtree",
+"_fnmatch",
"_frozen_importlib",
"_frozen_importlib_external",
"_functools",
diff --git a/configure b/configure
index 52988f77f6d926..60ebd04f3735d7 100755
--- a/configure
+++ b/configure
@@ -801,6 +801,8 @@ MODULE__JSON_FALSE
MODULE__JSON_TRUE
MODULE__HEAPQ_FALSE
MODULE__HEAPQ_TRUE
+MODULE__FNMATCH_FALSE
+MODULE__FNMATCH_TRUE
MODULE__CSV_FALSE
MODULE__CSV_TRUE
MODULE__CONTEXTVARS_FALSE
@@ -27749,6 +27751,7 @@ SRCDIRS="\
Modules/_ctypes \
Modules/_decimal \
Modules/_decimal/libmpdec \
+ Modules/_fnmatch \
Modules/_hacl \
Modules/_io \
Modules/_multiprocessing \
@@ -29233,6 +29236,28 @@ then :
+fi
+
+
+ if test "$py_cv_module__fnmatch" != "n/a"
+then :
+ py_cv_module__fnmatch=yes
+fi
+ if test "$py_cv_module__fnmatch" = yes; then
+ MODULE__FNMATCH_TRUE=
+ MODULE__FNMATCH_FALSE='#'
+else
+ MODULE__FNMATCH_TRUE='#'
+ MODULE__FNMATCH_FALSE=
+fi
+
+ as_fn_append MODULE_BLOCK "MODULE__FNMATCH_STATE=$py_cv_module__fnmatch$as_nl"
+ if test "x$py_cv_module__fnmatch" = xyes
+then :
+
+ as_fn_append MODULE_BLOCK "MODULE__FNMATCH_CFLAGS=-I\$(srcdir)/Modules/_fnmatch$as_nl"
+
+
fi
@@ -31818,6 +31843,10 @@ if test -z "${MODULE__CSV_TRUE}" && test -z "${MODULE__CSV_FALSE}"; then
as_fn_error $? "conditional \"MODULE__CSV\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${MODULE__FNMATCH_TRUE}" && test -z "${MODULE__FNMATCH_FALSE}"; then
+ as_fn_error $? "conditional \"MODULE__FNMATCH\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${MODULE__HEAPQ_TRUE}" && test -z "${MODULE__HEAPQ_FALSE}"; then
as_fn_error $? "conditional \"MODULE__HEAPQ\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index 5bde6803cd5a7b..2183ae2a590daa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -7009,6 +7009,7 @@ SRCDIRS="\
Modules/_ctypes \
Modules/_decimal \
Modules/_decimal/libmpdec \
+ Modules/_fnmatch \
Modules/_hacl \
Modules/_io \
Modules/_multiprocessing \
@@ -7694,6 +7695,7 @@ PY_STDLIB_MOD_SIMPLE([_asyncio])
PY_STDLIB_MOD_SIMPLE([_bisect])
PY_STDLIB_MOD_SIMPLE([_contextvars])
PY_STDLIB_MOD_SIMPLE([_csv])
+PY_STDLIB_MOD_SIMPLE([_fnmatch], [-I\$(srcdir)/Modules/_fnmatch], [])
PY_STDLIB_MOD_SIMPLE([_heapq])
PY_STDLIB_MOD_SIMPLE([_json])
PY_STDLIB_MOD_SIMPLE([_lsprof])