現在のPyPCASTLのソースコード

CodeReposにあげるのとかはどうかと思うので、取りあえずここに張ってみる。
結構大きいので注意(??)

一応PLYというPython Lex Yaccというモジュールが必要ですので、適宜入手してください。どうしたら良いか私には分かりません。
ソースコードを見ていたら気持ちが悪くなったという報告を受けました。その通りです、これは相当に気持ち悪いです。
Pythonレベルでも勿論そうですが、それ以前に無駄があったり定義通りじゃないとかきもい事に成っているのですが、まぁこういうのは出しちゃうのが大事なのでどなたかが新しく実装してくれれば良いんですよね。


後、あれ(forとか)を追加してくれとかいう話が飛ばされたのでforとかを実装しましたが、面倒くさいのでpasteはしません。

import lex
from ast import AST
import sys

tokens = (
    'NUMBER',
    'PLUS','MINUS','TIMES','DIVIDE','EXP',#+,-,*,/,**
    'LPAREN','RPAREN',#()
    'LBRCKT','RBRCKT',#[]
    'LBRACE','RBRACE',#{}
    'DOT',
    'IF','ELSE',
    'FOR',
    'NEWLINE','SEMICOLON','COMMA',
    'EQ','EQ2',#=
    'VAR',
    'FUNCTION','FUNC_PRINT','FUNC_RETURN',"FUNC_INFO",
    'PARENT','CHILDSET',
    'STRING',
)

reserved = {
    'if' : 'IF',
    'else' : 'ELSE',
    'for' : 'FOR',
    'function' : 'FUNCTION',
    'print' : 'FUNC_PRINT',
    'return' : 'FUNC_RETURN',
    'info' : 'FUNC_INFO',
    'parent' : 'PARENT',
    'childset' : 'CHILDSET',
}

t_PLUS   = r'\+'
t_MINUS  = r'-' 
t_EXP    = r'\*\*'
t_TIMES  = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRCKT = r'\['
t_RBRCKT = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_SEMICOLON = r'\;'
t_COMMA  = r','
t_DOT    = r'\.'
t_EQ     = r'='
t_EQ2    = r'=='

t_ignore = ' \t'

'''
lex ID or VAR
'''
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value,'VAR')
    return t
'''
lex Number
'''
def t_NUMBER(t):
    r'\d+'
    try:
        t.value = int(t.value)
    except ValueError:
        print "Line %d: Number %s is too large!" % (t.lineto,t.value)
        t.value = 0
    return t

def t_STRING(t):
    r'\"[a-zA-Z_ ,!]*\"'
    return t

def t_newline(t):
    r'\n+'
    t.lineno += len(t.value)
    
def t_error(t):
    print "Illegal character '%s'" % t.value[0]
    t.skip(1)

import yacc
import operator #for method-type operator

'''
it have self value
'''
end_token = (
    "Number",
    "VAR_ref",
    "VAR_assigned",
    "VAR_arg",
    "Function_call",
)

ignore_token = (
)

class Node:
    def __init__(self,type,childset=None,cur=None):
        self.type = type
        if childset:
            self.childset = childset
        else:
            self.childset = []
        self.cur = cur

def str2ope(str):
    if str=='+':return operator.add
    elif str=='-':return operator.sub
    elif str=='*':return operator.mul
    elif str=='/':return operator.div
    elif str=='**':return operator.pow

precedence = (
    ('right','EQ2'),
    ('left','PLUS','MINUS'),
    ('left','TIMES','DIVIDE'),
    ('right','EXP'),
)

def p_program_stmt(p):
    '''program : statementlist'''
    p[0] = Node("programlist",p[1])

def p_statement_statelist(p):
    '''statement : LBRACE statementlist RBRACE'''
    p[0] = Node("statementlist",p[2])

def p_statement_exp(p):
    '''statement : exp'''
    p[0] = p[1]
def p_statement_selection(p):
    '''statement : selection_statement'''
    p[0] = p[1]
def p_statement_iteration(p):
    '''statement : iteration_statement'''
    p[0] = p[1]

def p_exp_binop(p):
    '''exp : exp PLUS exp
           | exp MINUS exp
           | exp TIMES exp
           | exp DIVIDE exp
           | exp EXP exp
           | exp EQ2 exp'''
    p[0] = Node("binop",[p[1],p[3]],p[2])
def p_exp_assign(p):
    '''exp : symbol EQ exp'''
    if p[1].type == "VAR_ref":
        p[1].type = "VAR_will_assign"
    p[0] = Node("binop=",[p[1],p[3]],"=")
def p_exp_symbol(p):
    '''exp : symbol'''
    p[0] = p[1]
def p_exp_str(p):
    '''exp : STRING'''
    p[0] = Node("String",[],p[1])
def p_exp_NUM(p):
    '''exp : NUMBER'''
    p[0] = Node("Number",[],p[1])
def p_exp_functiondef(p):
    '''exp : FUNCTION LPAREN paramlist RPAREN statement'''
    p[0] = Node("Function_def",[p[3],p[5]])
def p_exp_functioncall(p):
    '''exp : VAR         LPAREN arglist RPAREN
           | FUNC_PRINT  LPAREN arglist RPAREN
           | FUNC_RETURN LPAREN arglist RPAREN
           | FUNC_INFO   LPAREN arglist RPAREN'''
    left = Node("func_name",[],p[1])
    p[0] = Node("Function_call",[left,p[3]])

def p_paramlist_empty(p):
    '''paramlist : empty'''
    p[0] = Node("paramlist",[])
def p_paramlist_var(p):
    '''paramlist : VAR psublist'''
    para_name = Node("para_name",[],p[1])
    p[0] = Node("paramlist",[ para_name ]+p[2])
def p_psublist(p):
    '''psublist : COMMA VAR psublist'''
    para_name = Node("para_name",[],p[2])
    p[0] = [ para_name ] + p[3]
def p_psublist_empty(p):
    '''psublist : empty'''
    p[0] = []

def p_arglist_empty(p):
    '''arglist : empty'''
    p[0] = Node("arglist",[])
def p_arglist_var(p):
    '''arglist : exp asublist'''
    p[0] = Node("arglist",[ p[1] ]+p[2])
def p_asublist(p):
    '''asublist : COMMA exp asublist'''
    p[0] = [ p[2] ] + p[3]
def p_asublist_empty(p):
    '''asublist : empty'''
    p[0] = []
    
def p_symbol_var(p):
    '''symbol : VAR'''
    p[0] = Node("VAR_ref",[],p[1])
def p_symbol_var_dotlist(p):
    '''symbol : VAR DOT dotlist'''
    p[0] = Node("VAR_dotlist",[],[p[1],p[3]])

def p_dotlist_parent(p):
    '''dotlist : PARENT'''
    p[0] = Node("parent",[])
def p_dotlist_childset(p):
    '''dotlist : CHILDSET LBRCKT exp RBRCKT'''
    p[0] = Node("child",[],p[3])
def p_dotliset_parents(p):
    '''dotlist : PARENT DOT dotlist'''
    p[0] = Node("parents",[],p[3])
def p_dotlist_childsets(p):
    '''dotlist : CHILDSET LBRCKT exp RBRCKT DOT dotlist'''
    p[0] = Node("childset",[],[p[3],p[6]])
   
def p_selection_statement(p):
    '''selection_statement : IF LPAREN exp RPAREN statement'''
    p[0] = Node("IfStmt",[p[3],p[5]])

def p_iteration_statement(p):
    '''iteration_statement : FOR LPAREN exp SEMICOLON exp SEMICOLON exp RPAREN statement'''
    p[0] = Node("ForStmt",[p[3],p[5],p[7],p[9]])

def p_statementlist_stmt(p):
    '''statementlist : statement'''
    p[0] = [p[1]]
def p_statementlist_stmtlist(p):
    '''statementlist : statementlist statement'''
    p[0] = p[1]+[p[2]]
def p_statementlist_stmtlist_semicolon(p):
    '''statementlist : statementlist SEMICOLON statement'''
    p[0] = p[1]+[p[3]]

def p_empty(p):
    '''empty :'''
    p[0] = Node("Empty",[],None)

def p_error(p):
    exit( "Syntax error in input!" )

lexer = lex.lex()
yacc.yacc()
ast = AST()

def main():
    if len(sys.argv) != 2:
        exit("python pcastl.py source")
    filename = sys.argv[1]
    s = open(filename).read()
    print s
    result = yacc.parse(s)
    ast.build2(result)
    #ast.visitor()
    ast.eval()

if __name__ == "__main__":
    main()
#ast.py
#-*- coding: utf-8 -*-
import copy,types,operator


reserved = {
    'if' : 'IF',
    'else' : 'ELSE',
    'for' : 'FOR',
    'function' : 'FUNCTION',
    'print' : 'FUNC_PRINT',
    'return' : 'FUNC_RETURN',
    'info' : 'FUNC_INFO',
}

def str2ope(str):
    if str=='+':return operator.add
    elif str=='-':return operator.sub
    elif str=='*':return operator.mul
    elif str=='/':return operator.div
    elif str=='**':return operator.pow
    elif str=='==':return operator.eq

class AST:
    class AST_node:
        def __init__(self,type,cur,parent,childset):
            self.type = type
            self.cur = cur
            self.parent = parent
            self.childset = childset
        def print_info(self):
            print "type:%s cur:%s parent:%s childset:%s"%(self.type,self.cur,self.parent,self.childset)

    class Frame:
        def __init__(self,frame):
            self.frames = [frame]
        def push_frame(self,frame):
            self.frames.append(frame)
        def pop_frame(self):
            self.frames.pop()
        def set_value(self,key,value):
            idx = len(self.frames)-1
            while idx>=0:
                if self.frames[idx].has_key(key):
                    self.frames[idx][key] = value
                    break
                idx -= 1
            if idx==-1:
                self.frames[-1][key] = value
        def let_value(self,key,value):
            self.local_frame()[key] = value
        def get_value(self,value):
            idx = len(self.frames)-1
            while idx>=0:
                if self.frames[idx].has_key(value):
                    return self.frames[idx][value]
                idx -= 1
            return None
        def global_frame(self):
            return self.frames[0]
        def local_frame(self):
            return self.frames[-1]
                

    class Closure:
        def __init__(self,var,body,env,ast_node):#virtual_arg,function_body,frame
            self.type = "Closure"
            self.var = var
            self.body = body
            self.env = env
            self.ast_node = ast_node

    class Function:
        def __init__(self,arg,body):
            self.type = "Function_class"
            self.arg = arg
            self.body = body
            self.real_arg = None

    def __init__(self,trees=[]):
        self.trees = trees
        self.ast_trees = []
        self.global_frame = self.Frame({})

    def set(self,trees):
        self.trees = trees

    def clear(self):
        self.ast_trees = []

    def build(self,trees,parent=None):
        self.ast_trees.append(self.AST_node(trees.type,trees.cur,parent,trees.childset))
        if not(len(trees.childset)==0):
            for child in trees.childset:
                self.build(child,trees)

    def build2(self,node,parent=None):
        this_node = self.AST_node(node.type,node.cur,parent,[])
        self.ast_trees.append(this_node)
        if not (parent is None):
            parent.childset.append(this_node)
        if not(len(node.childset)==0):
            for child in node.childset:
                self.build2(child,this_node)

    def visitor(self):
        for node in self.ast_trees:
            node.print_info()
            
    def eval(self):
        self.eval_node(self.global_frame,self.ast_trees[0])
            
    def eval_node(self,env,node):
        if node.type == "programlist":
            for n in node.childset:
                self.eval_node(env,n)
        elif node.type == "statementlist":
            env.push_frame({})
            for n in node.childset:
                ret = self.eval_node(env,n)
                if not(ret is None):
                    env.pop_frame()
                    return ret
            env.pop_frame()
            return ret
        elif node.type == "binop":
            return self.eval_binop(env,node)
        elif node.type == "binop=":
            return self.eval_assign(env,node)
        elif node.type == "Number":
            return self.eval_number(env,node)
        elif node.type == "String":
            return self.eval_string(env,node)
        elif node.type == "VAR_will_assign":
            return node.cur
        elif node.type == "VAR_ref":
            return env.get_value(node.cur)
        elif node.type == "Function_call":
            ret =  self.eval_app(env,node)
            return ret
        elif node.type == "arglist":
            return self.eval_arglist(env,node)
        elif node.type == "Function_def":
            return self.Closure(node.childset[0],node.childset[1],copy.deepcopy(env),
                                node)
        elif node.type == "IfStmt":
            return self.eval_if_stmt(env,node)
        elif node.type == "VAR_dotlist":
            ret = self.eval_dotlist(env,node)
            return ret

    def eval_dotlist(self,env,node):
        touched_node = env.get_value(node.cur[0])
        target = self.AST_node(None,None,None,None)
        if touched_node.type == "Closure":
            target.type = "Closure"
            target.cur = None
            target.parent = touched_node.ast_node.parent
            target.childset = touched_node.ast_node.childset
        else:
            target = node
        ret = self.eval_dotlist_core(env,target,node.cur[1])
        return ret

    def eval_dotlist_core(self,env,target,dots):
        if dots.type == "parent":
            ret = target.parent
        elif dots.type == "parents":
            tmp = target.parent
            ret = self.eval_dotlist_core(env,tmp,dots.cur)
        elif dots.type == "child":
            idx = self.eval_node(env,dots.cur)
            ret = target.childset[idx]
        elif dots.type == "childset":
            idx = self.eval_node(env,dots.cur[0])
            tmp = target.childset[idx]
            ret = self.eval_dotlist_core(env,tmp,dots.cur[1])
        return ret

    def eval_binop(self,env,node):
        left = self.eval_node(env,node.childset[0])
        right = self.eval_node(env,node.childset[1])
        ret = str2ope(node.cur)(left,right)
        return ret

    def eval_assign(self,env,node):
        left = self.eval_node(env,node.childset[0])
        right = self.eval_node(env,node.childset[1])
        try:
            if not(left.type == None):
                tmp = self.AST_node(node.childset[1].type,node.childset[1].cur,
                                    left.parent,node.childset[1].childset)
                for i in range(0,len(left.parent.childset)):
                    if left.parent.childset[i] == left:
                        left.parent.childset[i] = tmp
                        break
                    left = tmp
        except:
            env.set_value(left,right)
        return None

    def eval_number(self,env,node):
        return int(node.cur)

    def eval_string(self,env,node):
        return node.cur
    
    def eval_var(self,env,node):
        return env.get_value(node.cur)

    #node.childset[0].cur = func_name
    #node.childset[1] = paramlist
    #func_name(real_arg);
    def eval_app(self,env,node):
        if not (node.childset[0].cur in reserved):
            real_args = self.eval_node(env,node.childset[1])
            ret = self.eval_app_userfunc(env,node,real_args)
            return ret
        elif (node.childset[0].cur == "print"):
            self.eval_app_print(env,node.childset[1])
        elif (node.childset[0].cur == "return"):
            return self.eval_app_return(env,node.childset[1])
        elif (node.childset[0].cur == "info"):
            self.eval_app_info(env,node.childset[1])

    def eval_app_print(self,env,args):
        if(len(args.childset)==1):
            arg = self.eval_node(env,args.childset[0])
            print "print:",arg
        else:
            exit("print function's argnum is incorrect")

    def eval_app_return(self,env,args):
        if(len(args.childset)==1):
            ret = self.eval_node(env,args.childset[0])
            return ret
        else:
            exit("return function's argnum is incorrect")
    def eval_app_info(self,env,args):
        if(len(args.childset)==1):
            val = self.eval_node(env,args.childset[0])
            print "info:para_type(",args.childset[0].type,") real_type(",val.type,")"
            print "value:",val.cur
        else:
            exit("info function's argnum is incorrect")
            
    def eval_app_userfunc(self,env,node,real_args):
        closure = env.get_value(node.childset[0].cur)
        if closure is None:
            closure = self.global_frame.frames[0][node.childset[0].cur]
        elif closure.type == "Function_call":
            return self.eval_app(env,closure)
        if closure is None:
            exit("miss")
        closure.env.push_frame({})#u'ローカルフレーム用'
        if closure.var.type is "paramlist":
            if not(real_args is None):
                for i in range(0,len(real_args)):
                    closure.env.let_value(closure.var.childset[i].cur,real_args[i])
        ret = self.eval_node(closure.env,closure.body)
        closure.env.pop_frame()
        return ret
    
    def eval_arglist(self,env,node):
        if len(node.childset) == 0:
            return None
        else :
            ret = []
            for n in node.childset:
                ret.append(self.eval_node(env,n))
            return ret

    def eval_func_class(self,env,node):#node:Func
        self.global_assign[node.arg.childset[0]] = node.real_arg
        node.arg.type = "VAR_ref"
        _arg = self.eval_node(node.arg)
        self.eval_node(node.body)

    def eval_if_stmt(self,env,node):
        cond = self.eval_node(env,node.childset[0])
        if cond != 0:
            return self.eval_node(env,node.childset[1])
        return None