forked from varia/varia.website
172 lines
6.3 KiB
Python
172 lines
6.3 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
pygments.lexers.stata
|
||
|
~~~~~~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Lexer for Stata
|
||
|
|
||
|
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
import re
|
||
|
from pygments.lexer import RegexLexer, include, words
|
||
|
from pygments.token import Comment, Keyword, Name, Number, \
|
||
|
String, Text, Operator
|
||
|
|
||
|
from pygments.lexers._stata_builtins import builtins_base, builtins_functions
|
||
|
|
||
|
__all__ = ['StataLexer']
|
||
|
|
||
|
|
||
|
class StataLexer(RegexLexer):
|
||
|
"""
|
||
|
For `Stata <http://www.stata.com/>`_ do files.
|
||
|
|
||
|
.. versionadded:: 2.2
|
||
|
"""
|
||
|
# Syntax based on
|
||
|
# - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado
|
||
|
# - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
|
||
|
# - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
|
||
|
|
||
|
name = 'Stata'
|
||
|
aliases = ['stata', 'do']
|
||
|
filenames = ['*.do', '*.ado']
|
||
|
mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
|
||
|
flags = re.MULTILINE | re.DOTALL
|
||
|
|
||
|
tokens = {
|
||
|
'root': [
|
||
|
include('comments'),
|
||
|
include('strings'),
|
||
|
include('macros'),
|
||
|
include('numbers'),
|
||
|
include('keywords'),
|
||
|
include('operators'),
|
||
|
include('format'),
|
||
|
(r'.', Text),
|
||
|
],
|
||
|
# Comments are a complicated beast in Stata because they can be
|
||
|
# nested and there are a few corner cases with that. See:
|
||
|
# - github.com/kylebarron/language-stata/issues/90
|
||
|
# - statalist.org/forums/forum/general-stata-discussion/general/1448244
|
||
|
'comments': [
|
||
|
(r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
|
||
|
(r'^\s*\*', Comment.Single, 'comments-star'),
|
||
|
(r'/\*', Comment.Multiline, 'comments-block'),
|
||
|
(r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
|
||
|
],
|
||
|
'comments-block': [
|
||
|
(r'/\*', Comment.Multiline, '#push'),
|
||
|
# this ends and restarts a comment block. but need to catch this so
|
||
|
# that it doesn\'t start _another_ level of comment blocks
|
||
|
(r'\*/\*', Comment.Multiline),
|
||
|
(r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
|
||
|
# Match anything else as a character inside the comment
|
||
|
(r'.', Comment.Multiline),
|
||
|
],
|
||
|
'comments-star': [
|
||
|
(r'///.*?\n', Comment.Single,
|
||
|
('#pop', 'comments-triple-slash')),
|
||
|
(r'(^//|(?<=\s)//)(?!/)', Comment.Single,
|
||
|
('#pop', 'comments-double-slash')),
|
||
|
(r'/\*', Comment.Multiline, 'comments-block'),
|
||
|
(r'.(?=\n)', Comment.Single, '#pop'),
|
||
|
(r'.', Comment.Single),
|
||
|
],
|
||
|
'comments-triple-slash': [
|
||
|
(r'\n', Comment.Special, '#pop'),
|
||
|
# A // breaks out of a comment for the rest of the line
|
||
|
(r'//.*?(?=\n)', Comment.Single, '#pop'),
|
||
|
(r'.', Comment.Special),
|
||
|
],
|
||
|
'comments-double-slash': [
|
||
|
(r'\n', Text, '#pop'),
|
||
|
(r'.', Comment.Single),
|
||
|
],
|
||
|
# `"compound string"' and regular "string"; note the former are
|
||
|
# nested.
|
||
|
'strings': [
|
||
|
(r'`"', String, 'string-compound'),
|
||
|
(r'(?<!`)"', String, 'string-regular'),
|
||
|
],
|
||
|
'string-compound': [
|
||
|
(r'`"', String, '#push'),
|
||
|
(r'"\'', String, '#pop'),
|
||
|
(r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
|
||
|
include('macros'),
|
||
|
(r'.', String)
|
||
|
],
|
||
|
'string-regular': [
|
||
|
(r'(")(?!\')|(?=\n)', String, '#pop'),
|
||
|
(r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
|
||
|
include('macros'),
|
||
|
(r'.', String)
|
||
|
],
|
||
|
# A local is usually
|
||
|
# `\w{0,31}'
|
||
|
# `:extended macro'
|
||
|
# `=expression'
|
||
|
# `[rsen](results)'
|
||
|
# `(++--)scalar(++--)'
|
||
|
#
|
||
|
# However, there are all sorts of weird rules wrt edge
|
||
|
# cases. Instead of writing 27 exceptions, anything inside
|
||
|
# `' is a local.
|
||
|
#
|
||
|
# A global is more restricted, so we do follow rules. Note only
|
||
|
# locals explicitly enclosed ${} can be nested.
|
||
|
'macros': [
|
||
|
(r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
|
||
|
(r'\$', Name.Variable.Global, 'macro-global-name'),
|
||
|
(r'`', Name.Variable, 'macro-local'),
|
||
|
],
|
||
|
'macro-local': [
|
||
|
(r'`', Name.Variable, '#push'),
|
||
|
(r"'", Name.Variable, '#pop'),
|
||
|
(r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
|
||
|
(r'\$', Name.Variable.Global, 'macro-global-name'),
|
||
|
(r'.', Name.Variable), # fallback
|
||
|
],
|
||
|
'macro-global-nested': [
|
||
|
(r'\$(\{|(?=[\$`]))', Name.Variable.Global, '#push'),
|
||
|
(r'\}', Name.Variable.Global, '#pop'),
|
||
|
(r'\$', Name.Variable.Global, 'macro-global-name'),
|
||
|
(r'`', Name.Variable, 'macro-local'),
|
||
|
(r'\w', Name.Variable.Global), # fallback
|
||
|
(r'(?!\w)', Name.Variable.Global, '#pop'),
|
||
|
],
|
||
|
'macro-global-name': [
|
||
|
(r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
|
||
|
(r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
|
||
|
(r'`', Name.Variable, 'macro-local', '#pop'),
|
||
|
(r'\w{1,32}', Name.Variable.Global, '#pop'),
|
||
|
],
|
||
|
# Built in functions and statements
|
||
|
'keywords': [
|
||
|
(words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
|
||
|
Name.Function),
|
||
|
(words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
|
||
|
Keyword),
|
||
|
],
|
||
|
# http://www.stata.com/help.cgi?operators
|
||
|
'operators': [
|
||
|
(r'-|==|<=|>=|<|>|&|!=', Operator),
|
||
|
(r'\*|\+|\^|/|!|~|==|~=', Operator)
|
||
|
],
|
||
|
# Stata numbers
|
||
|
'numbers': [
|
||
|
# decimal number
|
||
|
(r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b',
|
||
|
Number),
|
||
|
],
|
||
|
# Stata formats
|
||
|
'format': [
|
||
|
(r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other),
|
||
|
(r'%(21x|16H|16L|8H|8L)', Name.Other),
|
||
|
(r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other),
|
||
|
(r'%[-~]?\d{1,4}s', Name.Other),
|
||
|
]
|
||
|
}
|