Count identifier occurrences
"""
Count the number of occurrences of identifiers in the input and print a
sorted top list to stdout.
In this example, the input is this file and the expected output is:
----(Counting 5 most common occurrences of identifier names)----------
3 lexer
3 token
2 Counter
2 alex
2 REGEXPS
"""
from collections import Counter
import alex
REGEXPS = (
("TSTR", r'^f?"""(?:\\.|(?!""").)*?"""|^f?\'\'\'(?:\\.|(?!\'\'\').)*?\'\'\''),
("STR", r'^f?"(?:\\.|[^"\\])*"|^f?\'(?:\\.|[^\'\\])*\''),
("NUM", '^["0123456789"]*'),
("REM", "^#[^\n]*"),
("ID", f"^[a-zA-Z_0-9]*"),
)
KEYWORDS = [
"False",
"None",
"True",
"and",
"as",
"assert",
"async",
"await",
"break",
"class",
"continue",
"def",
"del",
"elif",
"else",
"except",
"finally",
"for",
"from",
"global",
"if",
"import",
"in",
"is",
"lambda",
"nonlocal",
"not",
"or",
"pass",
"raise",
"return",
"try",
"while",
"with",
"yield",
"self",
"dict",
"set",
"string",
"isinstance",
"args",
"kwargs",
"len",
]
def main():
lexer = alex.Alex(regexps=REGEXPS, keywords=KEYWORDS, skip_unrecognized_chars=True)
scan(lexer)
print_report(lexer)
def scan(lexer):
lexer.scan_file("count_identifiers_occurrence.py")
def print_report(lexer):
show = 5
print(
f"----(Counting {show} most common occurrences of identifier names)----------"
)
names = [token.lexeme for token in lexer.tokens if token.name == "ID"]
counter = Counter(names).most_common(show)
for name, length in counter:
print(f"{length:3} {name}")
if __name__ == "__main__":
main()