現在のPyPCASTLのソースコード
CodeReposにあげるのとかはどうかと思うので、取りあえずここに張ってみる。
結構大きいので注意(??)
一応PLYというPython Lex Yaccというモジュールが必要ですので、適宜入手してください。どうしたら良いか私には分かりません。
ソースコードを見ていたら気持ちが悪くなったという報告を受けました。その通りです、これは相当に気持ち悪いです。
Pythonレベルでも勿論そうですが、それ以前に無駄があったり定義通りじゃないとかきもい事に成っているのですが、まぁこういうのは出しちゃうのが大事なのでどなたかが新しく実装してくれれば良いんですよね。
後、あれ(forとか)を追加してくれとかいう話が飛ばされたのでforとかを実装しましたが、面倒くさいのでpasteはしません。
import lex from ast import AST import sys tokens = ( 'NUMBER', 'PLUS','MINUS','TIMES','DIVIDE','EXP',#+,-,*,/,** 'LPAREN','RPAREN',#() 'LBRCKT','RBRCKT',#[] 'LBRACE','RBRACE',#{} 'DOT', 'IF','ELSE', 'FOR', 'NEWLINE','SEMICOLON','COMMA', 'EQ','EQ2',#= 'VAR', 'FUNCTION','FUNC_PRINT','FUNC_RETURN',"FUNC_INFO", 'PARENT','CHILDSET', 'STRING', ) reserved = { 'if' : 'IF', 'else' : 'ELSE', 'for' : 'FOR', 'function' : 'FUNCTION', 'print' : 'FUNC_PRINT', 'return' : 'FUNC_RETURN', 'info' : 'FUNC_INFO', 'parent' : 'PARENT', 'childset' : 'CHILDSET', } t_PLUS = r'\+' t_MINUS = r'-' t_EXP = r'\*\*' t_TIMES = r'\*' t_DIVIDE = r'/' t_LPAREN = r'\(' t_RPAREN = r'\)' t_LBRCKT = r'\[' t_RBRCKT = r'\]' t_LBRACE = r'{' t_RBRACE = r'}' t_SEMICOLON = r'\;' t_COMMA = r',' t_DOT = r'\.' t_EQ = r'=' t_EQ2 = r'==' t_ignore = ' \t' ''' lex ID or VAR ''' def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value,'VAR') return t ''' lex Number ''' def t_NUMBER(t): r'\d+' try: t.value = int(t.value) except ValueError: print "Line %d: Number %s is too large!" % (t.lineto,t.value) t.value = 0 return t def t_STRING(t): r'\"[a-zA-Z_ ,!]*\"' return t def t_newline(t): r'\n+' t.lineno += len(t.value) def t_error(t): print "Illegal character '%s'" % t.value[0] t.skip(1) import yacc import operator #for method-type operator ''' it have self value ''' end_token = ( "Number", "VAR_ref", "VAR_assigned", "VAR_arg", "Function_call", ) ignore_token = ( ) class Node: def __init__(self,type,childset=None,cur=None): self.type = type if childset: self.childset = childset else: self.childset = [] self.cur = cur def str2ope(str): if str=='+':return operator.add elif str=='-':return operator.sub elif str=='*':return operator.mul elif str=='/':return operator.div elif str=='**':return operator.pow precedence = ( ('right','EQ2'), ('left','PLUS','MINUS'), ('left','TIMES','DIVIDE'), ('right','EXP'), ) def p_program_stmt(p): '''program : statementlist''' p[0] = Node("programlist",p[1]) def p_statement_statelist(p): '''statement : LBRACE statementlist RBRACE''' p[0] = Node("statementlist",p[2]) def p_statement_exp(p): '''statement : exp''' p[0] = p[1] def p_statement_selection(p): '''statement : selection_statement''' p[0] = p[1] def p_statement_iteration(p): '''statement : iteration_statement''' p[0] = p[1] def p_exp_binop(p): '''exp : exp PLUS exp | exp MINUS exp | exp TIMES exp | exp DIVIDE exp | exp EXP exp | exp EQ2 exp''' p[0] = Node("binop",[p[1],p[3]],p[2]) def p_exp_assign(p): '''exp : symbol EQ exp''' if p[1].type == "VAR_ref": p[1].type = "VAR_will_assign" p[0] = Node("binop=",[p[1],p[3]],"=") def p_exp_symbol(p): '''exp : symbol''' p[0] = p[1] def p_exp_str(p): '''exp : STRING''' p[0] = Node("String",[],p[1]) def p_exp_NUM(p): '''exp : NUMBER''' p[0] = Node("Number",[],p[1]) def p_exp_functiondef(p): '''exp : FUNCTION LPAREN paramlist RPAREN statement''' p[0] = Node("Function_def",[p[3],p[5]]) def p_exp_functioncall(p): '''exp : VAR LPAREN arglist RPAREN | FUNC_PRINT LPAREN arglist RPAREN | FUNC_RETURN LPAREN arglist RPAREN | FUNC_INFO LPAREN arglist RPAREN''' left = Node("func_name",[],p[1]) p[0] = Node("Function_call",[left,p[3]]) def p_paramlist_empty(p): '''paramlist : empty''' p[0] = Node("paramlist",[]) def p_paramlist_var(p): '''paramlist : VAR psublist''' para_name = Node("para_name",[],p[1]) p[0] = Node("paramlist",[ para_name ]+p[2]) def p_psublist(p): '''psublist : COMMA VAR psublist''' para_name = Node("para_name",[],p[2]) p[0] = [ para_name ] + p[3] def p_psublist_empty(p): '''psublist : empty''' p[0] = [] def p_arglist_empty(p): '''arglist : empty''' p[0] = Node("arglist",[]) def p_arglist_var(p): '''arglist : exp asublist''' p[0] = Node("arglist",[ p[1] ]+p[2]) def p_asublist(p): '''asublist : COMMA exp asublist''' p[0] = [ p[2] ] + p[3] def p_asublist_empty(p): '''asublist : empty''' p[0] = [] def p_symbol_var(p): '''symbol : VAR''' p[0] = Node("VAR_ref",[],p[1]) def p_symbol_var_dotlist(p): '''symbol : VAR DOT dotlist''' p[0] = Node("VAR_dotlist",[],[p[1],p[3]]) def p_dotlist_parent(p): '''dotlist : PARENT''' p[0] = Node("parent",[]) def p_dotlist_childset(p): '''dotlist : CHILDSET LBRCKT exp RBRCKT''' p[0] = Node("child",[],p[3]) def p_dotliset_parents(p): '''dotlist : PARENT DOT dotlist''' p[0] = Node("parents",[],p[3]) def p_dotlist_childsets(p): '''dotlist : CHILDSET LBRCKT exp RBRCKT DOT dotlist''' p[0] = Node("childset",[],[p[3],p[6]]) def p_selection_statement(p): '''selection_statement : IF LPAREN exp RPAREN statement''' p[0] = Node("IfStmt",[p[3],p[5]]) def p_iteration_statement(p): '''iteration_statement : FOR LPAREN exp SEMICOLON exp SEMICOLON exp RPAREN statement''' p[0] = Node("ForStmt",[p[3],p[5],p[7],p[9]]) def p_statementlist_stmt(p): '''statementlist : statement''' p[0] = [p[1]] def p_statementlist_stmtlist(p): '''statementlist : statementlist statement''' p[0] = p[1]+[p[2]] def p_statementlist_stmtlist_semicolon(p): '''statementlist : statementlist SEMICOLON statement''' p[0] = p[1]+[p[3]] def p_empty(p): '''empty :''' p[0] = Node("Empty",[],None) def p_error(p): exit( "Syntax error in input!" ) lexer = lex.lex() yacc.yacc() ast = AST() def main(): if len(sys.argv) != 2: exit("python pcastl.py source") filename = sys.argv[1] s = open(filename).read() print s result = yacc.parse(s) ast.build2(result) #ast.visitor() ast.eval() if __name__ == "__main__": main()
#ast.py #-*- coding: utf-8 -*- import copy,types,operator reserved = { 'if' : 'IF', 'else' : 'ELSE', 'for' : 'FOR', 'function' : 'FUNCTION', 'print' : 'FUNC_PRINT', 'return' : 'FUNC_RETURN', 'info' : 'FUNC_INFO', } def str2ope(str): if str=='+':return operator.add elif str=='-':return operator.sub elif str=='*':return operator.mul elif str=='/':return operator.div elif str=='**':return operator.pow elif str=='==':return operator.eq class AST: class AST_node: def __init__(self,type,cur,parent,childset): self.type = type self.cur = cur self.parent = parent self.childset = childset def print_info(self): print "type:%s cur:%s parent:%s childset:%s"%(self.type,self.cur,self.parent,self.childset) class Frame: def __init__(self,frame): self.frames = [frame] def push_frame(self,frame): self.frames.append(frame) def pop_frame(self): self.frames.pop() def set_value(self,key,value): idx = len(self.frames)-1 while idx>=0: if self.frames[idx].has_key(key): self.frames[idx][key] = value break idx -= 1 if idx==-1: self.frames[-1][key] = value def let_value(self,key,value): self.local_frame()[key] = value def get_value(self,value): idx = len(self.frames)-1 while idx>=0: if self.frames[idx].has_key(value): return self.frames[idx][value] idx -= 1 return None def global_frame(self): return self.frames[0] def local_frame(self): return self.frames[-1] class Closure: def __init__(self,var,body,env,ast_node):#virtual_arg,function_body,frame self.type = "Closure" self.var = var self.body = body self.env = env self.ast_node = ast_node class Function: def __init__(self,arg,body): self.type = "Function_class" self.arg = arg self.body = body self.real_arg = None def __init__(self,trees=[]): self.trees = trees self.ast_trees = [] self.global_frame = self.Frame({}) def set(self,trees): self.trees = trees def clear(self): self.ast_trees = [] def build(self,trees,parent=None): self.ast_trees.append(self.AST_node(trees.type,trees.cur,parent,trees.childset)) if not(len(trees.childset)==0): for child in trees.childset: self.build(child,trees) def build2(self,node,parent=None): this_node = self.AST_node(node.type,node.cur,parent,[]) self.ast_trees.append(this_node) if not (parent is None): parent.childset.append(this_node) if not(len(node.childset)==0): for child in node.childset: self.build2(child,this_node) def visitor(self): for node in self.ast_trees: node.print_info() def eval(self): self.eval_node(self.global_frame,self.ast_trees[0]) def eval_node(self,env,node): if node.type == "programlist": for n in node.childset: self.eval_node(env,n) elif node.type == "statementlist": env.push_frame({}) for n in node.childset: ret = self.eval_node(env,n) if not(ret is None): env.pop_frame() return ret env.pop_frame() return ret elif node.type == "binop": return self.eval_binop(env,node) elif node.type == "binop=": return self.eval_assign(env,node) elif node.type == "Number": return self.eval_number(env,node) elif node.type == "String": return self.eval_string(env,node) elif node.type == "VAR_will_assign": return node.cur elif node.type == "VAR_ref": return env.get_value(node.cur) elif node.type == "Function_call": ret = self.eval_app(env,node) return ret elif node.type == "arglist": return self.eval_arglist(env,node) elif node.type == "Function_def": return self.Closure(node.childset[0],node.childset[1],copy.deepcopy(env), node) elif node.type == "IfStmt": return self.eval_if_stmt(env,node) elif node.type == "VAR_dotlist": ret = self.eval_dotlist(env,node) return ret def eval_dotlist(self,env,node): touched_node = env.get_value(node.cur[0]) target = self.AST_node(None,None,None,None) if touched_node.type == "Closure": target.type = "Closure" target.cur = None target.parent = touched_node.ast_node.parent target.childset = touched_node.ast_node.childset else: target = node ret = self.eval_dotlist_core(env,target,node.cur[1]) return ret def eval_dotlist_core(self,env,target,dots): if dots.type == "parent": ret = target.parent elif dots.type == "parents": tmp = target.parent ret = self.eval_dotlist_core(env,tmp,dots.cur) elif dots.type == "child": idx = self.eval_node(env,dots.cur) ret = target.childset[idx] elif dots.type == "childset": idx = self.eval_node(env,dots.cur[0]) tmp = target.childset[idx] ret = self.eval_dotlist_core(env,tmp,dots.cur[1]) return ret def eval_binop(self,env,node): left = self.eval_node(env,node.childset[0]) right = self.eval_node(env,node.childset[1]) ret = str2ope(node.cur)(left,right) return ret def eval_assign(self,env,node): left = self.eval_node(env,node.childset[0]) right = self.eval_node(env,node.childset[1]) try: if not(left.type == None): tmp = self.AST_node(node.childset[1].type,node.childset[1].cur, left.parent,node.childset[1].childset) for i in range(0,len(left.parent.childset)): if left.parent.childset[i] == left: left.parent.childset[i] = tmp break left = tmp except: env.set_value(left,right) return None def eval_number(self,env,node): return int(node.cur) def eval_string(self,env,node): return node.cur def eval_var(self,env,node): return env.get_value(node.cur) #node.childset[0].cur = func_name #node.childset[1] = paramlist #func_name(real_arg); def eval_app(self,env,node): if not (node.childset[0].cur in reserved): real_args = self.eval_node(env,node.childset[1]) ret = self.eval_app_userfunc(env,node,real_args) return ret elif (node.childset[0].cur == "print"): self.eval_app_print(env,node.childset[1]) elif (node.childset[0].cur == "return"): return self.eval_app_return(env,node.childset[1]) elif (node.childset[0].cur == "info"): self.eval_app_info(env,node.childset[1]) def eval_app_print(self,env,args): if(len(args.childset)==1): arg = self.eval_node(env,args.childset[0]) print "print:",arg else: exit("print function's argnum is incorrect") def eval_app_return(self,env,args): if(len(args.childset)==1): ret = self.eval_node(env,args.childset[0]) return ret else: exit("return function's argnum is incorrect") def eval_app_info(self,env,args): if(len(args.childset)==1): val = self.eval_node(env,args.childset[0]) print "info:para_type(",args.childset[0].type,") real_type(",val.type,")" print "value:",val.cur else: exit("info function's argnum is incorrect") def eval_app_userfunc(self,env,node,real_args): closure = env.get_value(node.childset[0].cur) if closure is None: closure = self.global_frame.frames[0][node.childset[0].cur] elif closure.type == "Function_call": return self.eval_app(env,closure) if closure is None: exit("miss") closure.env.push_frame({})#u'ローカルフレーム用' if closure.var.type is "paramlist": if not(real_args is None): for i in range(0,len(real_args)): closure.env.let_value(closure.var.childset[i].cur,real_args[i]) ret = self.eval_node(closure.env,closure.body) closure.env.pop_frame() return ret def eval_arglist(self,env,node): if len(node.childset) == 0: return None else : ret = [] for n in node.childset: ret.append(self.eval_node(env,n)) return ret def eval_func_class(self,env,node):#node:Func self.global_assign[node.arg.childset[0]] = node.real_arg node.arg.type = "VAR_ref" _arg = self.eval_node(node.arg) self.eval_node(node.body) def eval_if_stmt(self,env,node): cond = self.eval_node(env,node.childset[0]) if cond != 0: return self.eval_node(env,node.childset[1]) return None