K&R Cの付録Aの文法(ANSI Cの文法?)で直接左再帰除去で対応出来ない左再帰な非終端記号があるかどうか調べる

import qualified Data.Set as Set
import qualified Data.List as List

data EBNF =
    Seq EBNF EBNF |
    Choice EBNF EBNF |
    Opt EBNF |
    Terminal String | -- ただし空文字列禁止
    NonTerminal NT
    deriving (Show , Eq , Ord)
    
{- 手間なので、型シノニムで -}
type HandleEmpty = Bool {- True = Accept , False = Reject -}
{-
Acceptは、その式が空文字列を受理出来るという事をあらわす
一方Rejectは、その式は空文字列を受理しないという事をあらわす
-}
             
{-
与えられたEBNFの部分式すべてからなる集合を求める。
自分自身も部分式であるとする。
-}
subexps :: EBNF -> Set.Set EBNF
subexps s@(Seq a b) = Set.insert s (Set.union (subexps a) (subexps b))
subexps s@(Choice a b) = Set.insert s (Set.union (subexps a) (subexps b))
subexps s@(Opt a) = Set.insert s (subexps a)
subexps s@(Terminal str) = Set.singleton s
subexps s@(NonTerminal nt) = Set.singleton s

{-
こいつをぶん回す。
accept_setは現段階で判明している、空文字列を受理する式
reject_setは現段階で判明している、空文字列を受理しない式
-}
data Handles = MkHandle { accept_set :: Set.Set EBNF , reject_set :: Set.Set EBNF }
             deriving (Show , Eq)

{-
現在分かっていること(=Handles)から、
今注目しているEBNFの式が、空文字列を受理するか、しないかを調べて更新
-}
deriv :: EBNF -> Handles -> Handles
deriv exp@(Seq a b) self@(MkHandle accept reject)
    | (Set.member a reject || Set.member b reject) = MkHandle accept (Set.insert exp reject)
    | (Set.member a accept && Set.member b accept) = MkHandle (Set.insert exp accept) reject
    | otherwise = self
deriv exp@(Choice a b) self@(MkHandle accept reject)
    | (Set.member a accept || Set.member b accept) = MkHandle (Set.insert exp accept) reject
    | (Set.member a reject && Set.member b reject) = MkHandle accept (Set.insert exp reject)
    | otherwise = self
deriv exp@(Opt e) (MkHandle accept reject) = MkHandle (Set.insert exp accept) reject
deriv exp@(Terminal str) (MkHandle accept reject) = MkHandle accept (Set.insert exp reject)
deriv exp@(NonTerminal nt) self@(MkHandle accept reject)
    | Set.member (grammer nt) accept = MkHandle (Set.insert exp accept) reject
    | Set.member (grammer nt) reject = MkHandle accept (Set.insert exp reject)
    | otherwise = self

all_nt :: [NT]
all_nt = enumFrom (toEnum 0)

{-
grammber_subexpsは、
{ 文法中の非終端記号 } u { 各非終端記号の生成規則に対する部分式からなる集合 }
-}
grammer_subexps = Set.union
                  (foldr (\e acc -> Set.union (subexps $ grammer e) acc) Set.empty all_nt)
                  (Set.fromList $ map NonTerminal all_nt)

{-
fixpointで判別付かなかった非終端記号に関しては、
その非終端記号から、
空文字列の受理をする、もしくは受理しないといった部分式に到達しない
無限回の導出が出来るため、左再帰を求める計算の上ではRejectと見ても良い。

計算的な意味としては、入力に対する無限の導出となって停止しない為、
後に出てくるleft cornerの考え的にはRejectと言えるということになります。

例えば、
S = A S
A = B
B = A
この例だとA,B,S全ての非終端記号の判断はつきません。
なので、全て空文字列を受理しないとして扱います。
-}
grammer_handles =
    foldr (\e self@(MkHandle accept reject) ->                             
               case Set.member e accept ||
                    Set.member e reject of
                 True -> self
                 False -> MkHandle accept (Set.insert e reject)) res (map NonTerminal all_nt)
  where
    {-
      fixpointは、文法中に出現する全ての部分式について、
      それが空文字列を受理するか受理しないかを分類する計算。
      
      かならず停止する。
     -}
    fixpoint cur =
        case cur == res of
          True -> res
          False -> fixpoint res
        where
          res = Set.fold deriv cur grammer_subexps
    
    res  = fixpoint (MkHandle Set.empty Set.empty)


(./) a b = Choice a b
(.>) a b = Seq a b
op a = Opt (NonTerminal a)
opt a = Opt a
t a = Terminal a
nt a = NonTerminal a

infixr 1 ./
infixr 2 .>

direct_left_recursion :: NT -> [NT]
direct_left_recursion e = collect $ grammer e
    where
      collect :: EBNF -> [NT]
      collect (Seq (Opt a) b) = (collect a) ++ (collect b)
      collect (Seq a b) = (collect a)
      collect (Choice a b) = (collect a) ++ (collect b)
      collect (Opt e) = []
      collect (Terminal _) = []
      collect (NonTerminal nt) | e == nt = [nt]
                               | otherwise = []

{-
ある非終端記号に対して、その生成規則に注目する

その生成規則のうち、入力列の頭にぶつかる可能性のある非終端記号をかき集めてくる

もしこの中に自分自身が含まれる、すなわち
A ∈ direct_left_corner(A)
ならば、Aは左再帰である(直接左再帰とは限らない)
-}
left_corner :: NT -> [NT]
left_corner = lc . grammer
    where
      lc :: EBNF -> [NT]
      lc (Seq a b)
          | Set.member a (accept_set grammer_handles) = (lc a) ++ (lc b)
          | otherwise = lc a
      lc (Choice a b) = (lc a) ++ (lc b)
      lc (Opt e) = lc e
      lc (Terminal _) = []
      lc (NonTerminal nt) = [nt]

collect_left_corner :: Set.Set NT -> Set.Set NT
collect_left_corner s =
  Set.fold (\e acc -> Set.union (Set.fromList $ left_corner e) acc) Set.empty s

mutual_left_corner :: NT -> Set.Set NT
mutual_left_corner nt =
  loop (Set.fromList $ foldr List.delete (left_corner nt) (direct_left_recursion nt))
    where
      loop s =
          case res == s of
            True -> res
            False -> loop res
        where
          res = Set.union s (collect_left_corner s)

is_direct_left_recursion nt = List.elem nt (direct_left_recursion nt)
is_mutually_left_recursion nt = Set.member nt (mutual_left_corner nt)

data NT =
    Translation_unit |
    External_declaration |
    Function_definition |
    Declaration |
    Declaration_list |
    Declaration_specifiers |
    Storage_class_specifier |
    Type_specifier |
    Type_qualifier |
    Struct_or_union_specifier |
    Struct_or_union |
    Struct_declaration_list |
    Init_declarator_list |
    Init_declarator |
    Struct_declaration |
    Specifier_qualifier_list |
    Struct_declarator_list |
    Struct_declarator |
    Enum_specifier |
    Enumerator_list |
    Enumerator |
    Declarator |
    Direct_declarator |
    Pointer |
    Type_qualifier_list |
    Parameter_type_list |
    Parameter_list |
    Parameter_declaration |
    Identifier_list |
    Initializer |
    Initializer_list |
    Type_name |
    Abstract_declarator |
    Direct_abstract_declarator |
    Typedef_name |
    Statement |
    Labeled_statement |
    Expression_statement |
    Compound_statement |
    Statement_list |
    Selection_statement |
    Iteration_statement |
    Jump_statement |
    Expression |
    Assignment_expression |
    Assignment_operator |
    Conditional_expression |
    Constant_expression |
    Logical_OR_expression |
    Logical_AND_expression |
    Inclusive_OR_expression |
    Exclusive_OR_expression |
    And_expression |
    Equality_expression |
    Relational_expression |
    Shift_expression |
    Additive_expression |
    Multiplicative_expression |
    Cast_expression |
    Unary_expression |
    Unary_operator |
    Postfix_expression |
    Primary_expression |
    Argument_expression_list |
    Constant
    deriving (Show , Eq , Ord , Enum)

translation_unit = NonTerminal Translation_unit
external_declaration = NonTerminal External_declaration
function_definition = NonTerminal Function_definition
declaration = NonTerminal Declaration
declaration_list = NonTerminal Declaration_list
declaration_specifiers = NonTerminal Declaration_specifiers
storage_class_specifier = NonTerminal Storage_class_specifier
type_specifier = NonTerminal Type_specifier
type_qualifier = NonTerminal Type_qualifier
struct_or_union_specifier = NonTerminal Struct_or_union_specifier
struct_or_union = NonTerminal Struct_or_union
struct_declaration_list = NonTerminal Struct_declaration_list
init_declarator_list = NonTerminal Init_declarator_list
init_declarator = NonTerminal Init_declarator
struct_declaration = NonTerminal Struct_declaration
specifier_qualifier_list = NonTerminal Specifier_qualifier_list
struct_declarator_list = NonTerminal Struct_declarator_list
struct_declarator = NonTerminal Struct_declarator
enum_specifier = NonTerminal Enum_specifier
enumerator_list = NonTerminal Enumerator_list
enumerator = NonTerminal Enumerator
declarator = NonTerminal Declarator
direct_declarator = NonTerminal Direct_declarator
pointer = NonTerminal Pointer
type_qualifier_list = NonTerminal Type_qualifier_list
parameter_type_list = NonTerminal Parameter_type_list
parameter_list = NonTerminal Parameter_list
parameter_declaration = NonTerminal Parameter_declaration
identifier_list = NonTerminal Identifier_list
initializer = NonTerminal Initializer
initializer_list = NonTerminal Initializer_list
type_name = NonTerminal Type_name
abstract_declarator = NonTerminal Abstract_declarator
direct_abstract_declarator = NonTerminal Direct_abstract_declarator
typedef_name = NonTerminal Typedef_name
statement = NonTerminal Statement
labeled_statement = NonTerminal Labeled_statement
expression_statement = NonTerminal Expression_statement
compound_statement = NonTerminal Compound_statement
statement_list = NonTerminal Statement_list
selection_statement = NonTerminal Selection_statement
iteration_statement = NonTerminal Iteration_statement
jump_statement = NonTerminal Jump_statement
expression = NonTerminal Expression
assignment_expression = NonTerminal Assignment_expression
assignment_operator = NonTerminal Assignment_operator
conditional_expression = NonTerminal Conditional_expression
constant_expression = NonTerminal Constant_expression
logical_OR_expression = NonTerminal Logical_OR_expression
logical_AND_expression = NonTerminal Logical_AND_expression
inclusive_OR_expression = NonTerminal Inclusive_OR_expression
exclusive_OR_expression = NonTerminal Exclusive_OR_expression
and_expression = NonTerminal And_expression
equality_expression = NonTerminal Equality_expression
relational_expression = NonTerminal Relational_expression
shift_expression = NonTerminal Shift_expression
additive_expression = NonTerminal Additive_expression
multiplicative_expression = NonTerminal Multiplicative_expression
cast_expression = NonTerminal Cast_expression
unary_expression = NonTerminal Unary_expression
unary_operator = NonTerminal Unary_operator
postfix_expression = NonTerminal Postfix_expression
primary_expression = NonTerminal Primary_expression
argument_expression_list = NonTerminal Argument_expression_list
constant = NonTerminal Constant

identifier = t "identifier"
string = t "string"
integer_constant = t "integer_constant"
character_constant = t "character_constant"
floating_constant = t "floating_constant"
enumeration_constant = t "enumeration_constant"

grammer Translation_unit =
    external_declaration ./
    translation_unit .> external_declaration
grammer External_declaration =
    function_definition ./
    declaration
grammer Function_definition = opt declaration_specifiers .> declarator .> opt declaration_list .> compound_statement
grammer Declaration = declaration_specifiers .> opt init_declarator_list .> t "!"
grammer Declaration_list =
    declaration ./
    declaration_list .> declaration
grammer Declaration_specifiers =
  storage_class_specifier .> opt declaration_specifiers ./
  type_specifier .> opt declaration_specifiers ./
  type_qualifier .> opt declaration_specifiers
grammer Storage_class_specifier =
    t "auto" ./ t "register" ./ t "static" ./ t "extern" ./ t "typedef"
grammer Type_specifier =
    t "void" ./ t "char" ./ t "short" ./ t "int" ./ t "long" ./ t "float" ./ t "double" ./ t "signed" ./
    t "unsigned" ./ struct_or_union_specifier ./ enum_specifier ./ typedef_name
grammer Type_qualifier = t "const" ./ t "volatile"
grammer Struct_or_union_specifier =
  struct_or_union .> opt identifier .> t "{" .> struct_declaration_list .> t "}" ./
  struct_or_union .> identifier
grammer Struct_or_union = t "struct" ./ t "union"
grammer Struct_declaration_list = struct_declaration ./ struct_declaration_list .> struct_declaration
grammer Init_declarator_list = init_declarator ./ init_declarator_list .> t "," .> init_declarator
grammer Init_declarator = declarator ./ declarator .> t "=" .> initializer
grammer Struct_declaration = specifier_qualifier_list .> struct_declarator_list .> t ";"
grammer Specifier_qualifier_list =
    type_specifier .> opt specifier_qualifier_list ./
    type_qualifier .> opt specifier_qualifier_list
grammer Struct_declarator_list =
    struct_declarator ./
    struct_declarator_list .> t "," .> struct_declarator
grammer Struct_declarator =
    declarator ./
    opt declarator .> t ":" .> constant_expression
grammer Enum_specifier =
    t "enum" .> opt identifier .> t "{" .> enumerator_list .> t "}" ./
    t "enum" .> identifier
grammer Enumerator_list =
    enumerator ./
    enumerator_list .> t "," .> enumerator
grammer Enumerator =
    identifier ./
    identifier .> t "=" .> constant_expression
grammer Declarator = opt pointer .> direct_declarator
grammer Direct_declarator =
    identifier ./
    t "(" .> declarator .> t ")" ./
    direct_declarator .> t "[" .> opt constant_expression .> t "]" ./
    direct_declarator .> t "(" .> parameter_type_list .> t ")" ./
    direct_declarator .> t "(" .> opt identifier_list .> t ")"
grammer Pointer =
    t "*" .> opt type_qualifier_list ./
    t "*" .> opt type_qualifier_list .> pointer
grammer Type_qualifier_list =
    type_qualifier ./
    type_qualifier_list .> type_qualifier
grammer Parameter_type_list =
    parameter_list ./
    parameter_list .> t "," .> t "..."
grammer Parameter_list =
    parameter_declaration ./
    parameter_list .> t "," .> parameter_declaration
grammer Parameter_declaration =
    declaration_specifiers .> declarator ./
    declaration_specifiers .> opt abstract_declarator
grammer Identifier_list =
    identifier ./
    identifier_list .> t "," .> identifier
grammer Initializer =
    assignment_expression ./
    t "{" .> initializer_list .> t "}" ./
    t "{" .> initializer_list .> t "," .> t "}"
grammer Initializer_list =
    initializer ./
    initializer_list .> t "," .> initializer
grammer Type_name =
    specifier_qualifier_list .> opt abstract_declarator
grammer Abstract_declarator =
    pointer ./
    opt pointer .> direct_abstract_declarator
grammer Direct_abstract_declarator =
    t "(" .> abstract_declarator .> t ")" ./
    opt direct_abstract_declarator .> t "[" .> opt constant_expression .> t "]" ./
    opt direct_abstract_declarator .> t "(" .> opt parameter_type_list .> t ")"
grammer Typedef_name = identifier
grammer Statement =
    labeled_statement ./
    expression_statement ./
    compound_statement ./
    selection_statement ./
    iteration_statement ./
    jump_statement
grammer Labeled_statement =
    identifier .> t ":" .> statement ./
    t "case" .> constant_expression .> t ":" .> statement ./
    t "default" .> t ":" .> statement
grammer Expression_statement = opt expression .> t ";"
grammer Compound_statement = t "{" .> opt declaration_list .> opt statement_list .> t "}"
grammer Statement_list =
    statement ./
    statement_list .> statement
grammer Selection_statement =
    t "if" .> t "(" .> expression .> t ")" .> statement ./
    t "if" .> t "(" .> expression .> t ")" .> statement .> t "else" .> statement ./
    t "switch" .> t "(" .> expression .> t ")" .> statement
grammer Iteration_statement =
    t "while" .> t "(" .> expression .> t ")" .> statement ./
    t "do" .> statement .> t "while" .> t "(" .> expression .> t ")" .> t ";" ./
    t "for" .> t "(" .> opt expression .> t ";" .> opt expression .> t ";" .> opt expression .> t ")" .> statement
grammer Jump_statement =
    t "goto" .> identifier .> t ";" ./
    t "continue" .> t ";" ./
    t "break" .> t ";" ./
    t "return" .> opt expression .> t ";"
grammer Expression =
    assignment_expression ./
    expression .> t "," .> assignment_expression
grammer Assignment_expression =
    conditional_expression ./
    unary_expression .> assignment_operator .> assignment_expression
grammer Assignment_operator =
    t "=" ./ t "*=" ./ t "/=" ./ t "%=" ./ t "+=" ./ t "-=" ./ t "<<=" ./ t ">>=" ./ t "&=" ./ t "^=" ./ t "|= "
grammer Conditional_expression =
    logical_OR_expression ./
    logical_OR_expression .> t "?" .> expression .> t ":" .> conditional_expression
grammer Constant_expression =
    conditional_expression
grammer Logical_OR_expression =
    logical_AND_expression ./
    logical_OR_expression .> t "||" .> logical_AND_expression
grammer Logical_AND_expression =
    inclusive_OR_expression ./
    logical_AND_expression .> t "&&" .> inclusive_OR_expression
grammer Inclusive_OR_expression =
    exclusive_OR_expression ./
    inclusive_OR_expression .> t "|" .> exclusive_OR_expression
grammer Exclusive_OR_expression =
    and_expression ./
    exclusive_OR_expression .> t "^" .> and_expression
grammer And_expression =
    equality_expression ./
    and_expression .> t "&" .> equality_expression
grammer Equality_expression =
    relational_expression ./
    equality_expression .> t "==" .> relational_expression ./
    equality_expression .> t "!=" .> relational_expression
grammer Relational_expression =
    shift_expression ./
    relational_expression .> t "<" .> shift_expression ./
    relational_expression .> t ">" .> shift_expression ./
    relational_expression .> t "<=" .> shift_expression ./
    relational_expression .> t ">=" .> shift_expression
grammer Shift_expression =
    additive_expression ./
    shift_expression .> t "<<" .> additive_expression ./
    shift_expression .> t ">>" .> additive_expression
grammer Additive_expression =
    multiplicative_expression ./
    additive_expression .> t "+" .> multiplicative_expression ./
    additive_expression .> t "-" .> multiplicative_expression
grammer Multiplicative_expression =
    cast_expression ./
    multiplicative_expression .> t "*" .> cast_expression ./
    multiplicative_expression .> t "/" .> cast_expression ./
    multiplicative_expression .> t "%" .> cast_expression
grammer Cast_expression =
    unary_expression ./
    t "(" .> type_name .> t ")" .> cast_expression
grammer Unary_expression =
    postfix_expression ./
    t "++" .> unary_expression ./
    t "--" .> unary_expression ./
    unary_expression .> cast_expression ./
    t "sizeof" .> unary_expression ./
    t "sizeof" .> t "(" .> type_name .> t ")"
grammer Unary_operator =
    t "&" ./ t "*" ./ t "+" ./ t "-" ./ t "~" ./ t "!"
grammer Postfix_expression =
    primary_expression ./
    postfix_expression .> t "[" .> expression .> t "]" ./
    postfix_expression .> t "(" .> opt argument_expression_list .> t ")" ./
    postfix_expression .> t "." .> identifier ./
    postfix_expression .> t "->" .> identifier ./
    postfix_expression .> t "++" ./
    postfix_expression .> t "--"
grammer Primary_expression =
    identifier ./
    constant ./
    string ./
    t "(" .> expression .> t ")"
grammer Argument_expression_list =
    assignment_expression ./
    argument_expression_list .> t "," .> assignment_expression    
grammer Constant =
    integer_constant ./
    character_constant ./
    floating_constant ./
    enumeration_constant
*Main> filter is_direct_left_recursion all_nt 
[Translation_unit,Declaration_list,Struct_declaration_list,Init_declarator_list,Struct_declarator_list,Enumerator_list,Direct_declarator,Type_qualifier_list,Parameter_list,Identifier_list,Initializer_list,Direct_abstract_declarator,Statement_list,Expression,Logical_OR_expression,Logical_AND_expression,Inclusive_OR_expression,Exclusive_OR_expression,And_expression,Equality_expression,Relational_expression,Shift_expression,Additive_expression,Multiplicative_expression,Unary_expression,Postfix_expression,Argument_expression_list]

*Main> filter is_mutually_left_recursion  all_nt 
[]

これそもそも正しく実装出来てるかどうか分かりませんけど、ちゃんと動いてるとすれば、一応直接左再帰除去を27の生成規則に適用すれば、再帰下降パーザでも取りあえずなんとか的な…。