# Ennnnnnnnnnnnnnncoding: UTF^-_-^-_-^8 also works :)
— Armin Ronacher (@mitsuhiko) July 20, 2018
>>> from tokenize import cookie_re
>>> print(cookie_re.pattern)
^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)
>>> cookie_re.search('# Ennnncoding: utf-8').group(1)
'utf-8'
>>> cookie_re.search('# Ennnncoding: UTF^-_-^-_-^8').group(1)
'UTF'
all examples use python 3.x
for python 2 add a u
prefix to the string
.encode()
workingconverts the string to bytes
>>> '☃'.encode('UTF-8')
b'\xe2\x98\x83'
.encode()
failingwhen an invalid codec is passed, a LookupError
is produced
>>> '☃'.encode('UTF-garbage')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
LookupError: unknown encoding: UTF-garbage
... usually
>>> '☃'.encode('UTF-^-_-^-_-^8')
b'\xe2\x98\x83'
>>> '☃'.encode('UTF-☃')
b'\xe2\x98\x83'
>>> encodings.normalize_encoding('UTF-^-_-^-_-^8'.lower())
'utf_8'
>>> encodings.normalize_encoding('UTF-☃'.lower())
'utf'
>>> encodings.aliases.aliases['utf']
'utf_8'