Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 36 additions & 19 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ def compile(pattern, flags=0):
def purge():
"Clear the regular expression caches"
_cache.clear()
_cache2.clear()
_compile_repl.cache_clear()

def template(pattern, flags=0):
Expand Down Expand Up @@ -267,39 +268,55 @@ def escape(pattern):
# internals

_cache = {} # ordered!
_cache2 = {} # ordered!

_MAXCACHE = 512
_MAXCACHE2 = 256
def _compile(pattern, flags):
# internal: compile pattern
if isinstance(flags, RegexFlag):
flags = flags.value
try:
return _cache[type(pattern), pattern, flags]
return _cache2[type(pattern), pattern, flags]
except KeyError:
pass
if isinstance(pattern, Pattern):
if flags:
raise ValueError(
"cannot process flags argument with a compiled pattern")
return pattern
if not _compiler.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
if flags & T:
import warnings
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
"as it is an undocumented flag "
"without an obvious purpose. "
"Don't use it.",
DeprecationWarning)
p = _compiler.compile(pattern, flags)
if not (flags & DEBUG):

key = (type(pattern), pattern, flags)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't this be defined before the try/except above?

I also wonder if an if key in _cache2: return _cache2[key] would be more efficient than the try/except.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All this would add a small but measurable overhead in the common case. I tested this when I wrote the current implementation.

p = _cache.pop(key, None)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it remove it from the _cache?
I think it would be better to add a comment to elaborate a bit.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we need to move it to the end.

if p is None:
if isinstance(pattern, Pattern):
if flags:
raise ValueError(
"cannot process flags argument with a compiled pattern")
return pattern
if not _compiler.isstring(pattern):
raise TypeError("first argument must be string or compiled pattern")
if flags & T:
import warnings
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
"as it is an undocumented flag "
"without an obvious purpose. "
"Don't use it.",
DeprecationWarning)
p = _compiler.compile(pattern, flags)
if flags & DEBUG:
return p
if len(_cache) >= _MAXCACHE:
# Drop the oldest item
# Drop the least used item
try:
del _cache[next(iter(_cache))]
except (StopIteration, RuntimeError, KeyError):
pass
_cache[type(pattern), pattern, flags] = p
# Append to the end
_cache[key] = p

if len(_cache2) >= _MAXCACHE2:
# Drop the oldest item
try:
del _cache2[next(iter(_cache2))]
except (StopIteration, RuntimeError, KeyError):
pass
_cache2[key] = p
return p

@functools.lru_cache(_MAXCACHE)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use double caching for compiled RE patterns.