# coding: utf-8
TRACE_DEDUCTIONS=False
TRACE_COMMANDS=False
WITH_TYPES=False
"""
Try to read and process openTheory files
see http://www.gilith.com/research/opentheory/article.html

run the program without arguments to options.

PROTOTYPE! NOT THOROUGHLY TESTED!

python opentheory.py ~/.opentheory/packages/bool-1.37/bool-1.37.art


????
defineConst
    Pop a term t; pop a name n; push a new constant c
    with name n; push the theorem ⊦ c = t 

is NEW constant checked?

http://www.gilith.com/pipermail/opentheory-users/2011-January/000080.html

> Also, it's not clear whether a "new constant" with the same name as a
> previously defined constant is allowed. I presume so.

This is left to the system, which must be careful to disallow

define `c = T`
define `c = F`
prove `T = F`

I believe all of the HOL theorem provers maintain a symbol table of
constants, and will not allow redefinitions with the same name. The
logical kernel of the opentheory tool stores the definition with the
constant, so will happily allow redefinitions with the same name
(because the two "c" constants above will be treated as different
constants).

Cheers,
Joe

AHA. Sind Konstanten Objekte? !!!


http://www.gilith.com/pipermail/opentheory-users/2011-January/000085.html

> So why does constTerm take 2 arguments instead of just 1?
> Is it just to allow you to instantiate type variables that might be in
> the constant on the stack?

Since constants are solely determined by name (and do not contain any
type information), you also have to provide a type argument to turn a
constant into a term.

Cheers,

Joe
====================
"""

# to be done:
# read input file in UTF-8!

stack = []
dictionary = {}
assumptions = []
theorems = []
theorem_number = 0

const_id_number = 0

version = 5 # initial default version

helptext = """python opentheory.py file.art OPTIONS
  OPTIONS:
    --deductions
    --trace-commands
    --show-types
"""

""" Object types. consist of a tag plus ONE additional field.
("oNum", int)
("oName", name) # name is of the form ([...], string))
("oList", [...])
("oTypeOp", (name, number)) 
         # every newly defined typeop gets a distinct number.
         # number can be None, for typeOps given just by name
         # Where is the arity stored? NOWHERE?? !!
("oType", type)
("oConst", (name, definition))
         # definition can be an oTerm or oNum object or None
# NEW PROPOSAL:
   definition = None for const,
   definition = (linenumber,0) for defineConst,
   definition = (linenumber,"abs") and
   definition = (linenumber,"rep") for defineTypeOp,
   definition = (linenumber,i) i>0, for the i-th constant in defineConst,
("oVar", (string,type)) # variables have empty namespace
("oTerm", (expression,type)) # term object have and expression AND A TYPE!!
("oThm", ([hypotheses], conclusion, id)) # id is a unique number

Two constants in an expression are equal,
 a) if they are both generated by defineConst and have
    alpha-equivalent definition terms,
 b) if they are both generated by defineConstList ..
 c) if they are both generated by "const" (and "constTerm") commands
    and have the same name and type,
 d) if they copies of the same "abs" and "rep" constants that were
    generated in a defineTypeOp command

name = ([strings], string)

expressions:
("tLambda", var, term) # var, term, are (expression,type) pairs
("tApply", f, x)       # f, x are (expression,type) pairs
("tConst", name, definition)
("tVar", string)

Every term object is represented by an (expression,type) pair,
and the first component of "expression" is the tag "tXxx".

types
("typeOp", name, number, (t1,...)) ##! TBD
  # use tuple instead of list of type arguments (t1,...).
  # Otherwise, type cannot be an index to a dictionary
  # number==None for external types, otherwise a unique number
  # given when defineTypeOp is introduced
("typeVar", string) # type variables have empty namespace

"""


############## creation of objects ###################
# def oNum(i): return ("oNum", int)
# ("oName", name) # name is of the form ([...], string))
# ("oList", [...])
# ("oTypeOp", (name, number)) 
#          # every newly defined typeop gets a distinct number.
#          # number can be None, for typeOps given just by name
#          # Where is the arity stored? NOWHERE?? !!
# ("oType", type)
# ("oConst", (name, definition))
# ("oVar", (string,type)) # variables have empty namespace
# ("oTerm", (expression,type)) # term object have and expression AND A TYPE!!
# ("oThm", ([hypotheses], conclusion, id)) # id is a unique number
# 
############## creation of terms ###################
def tLambda(var, term):
 # var, term, are (expression,type) pairs
   exvar,typvar =var
   _,typterm=term
   check (exvar[0] in ("tVar","tbVar"), "malformed lambda-term")
   return ("tLambda", var, term), ("typeOp",ARROW,None,(typvar,typterm))
def tApply(f,x):
 # f, x are (expression,type) pairs
   _, typf = f
   _, typx = x
   check (typf[:3]==("typeOp",ARROW,None))
   check (typf[:3]==("typeOp",ARROW,None) and typf[3][0]==typx,
          "types do not match in application term")
   return ("tApply", f, x),typf[3][1]
def tConst(name, definition, typ):
   return ("tConst", name, definition),typ
def tVar(id,typ):
   return ("tVar", id),typ
def tbVar(string,num,typ):
   return ("tbVar", string,num),typ
############## creation of types ###################
def typeOp(name, args, number):
   return ("typeOp", name, number, args)
def typeVar(string): return ("typeVar", string)


##############  ###################

def prettyprint(ob,withtypes=False):
 global types_list
 types_list = []
 tracenumbers=TRACE_DEDUCTIONS
 ot,ov=ob
 if ot=="oNum":
   return "N"+str(ov)
 elif ot=="oName":
   return "S"+printname(ov)
 elif ot=="oList":
   return "["+",".join(prettyprint(el,withtypes) for el in ov)+"]"
 elif ot=="oTypeOp":
   return "O"+printname(ov[0])
 elif ot=="oType":
   return "T"+printtype(ov)
 elif ot=="oConst":
   return "C"+printname(ov[0])
 elif ot=="oVar":
   n,t=ov
   return "V"+printsimplename(n)+"::"+printtype(t)
 elif ot=="oTerm":
   n,t=ov
   if withtypes:
    return "T"+printterm((n,t),True)
   else:
    return "T"+printterm((n,t),False)+"::"+printtype(t)
 elif ot=="oThm":
   prefix = ""
   if tracenumbers:
     prefix = "("+str(ov[2])+") "
   return (prefix+
           ("{"+",".join([prettyprint(o,withtypes) for o in ov[0]])
            +"}" if ov[0] else "")
           +"⊦"+prettyprint(ov[1],withtypes))
 else:
   error("illegal object "+str(ob))

def printname((l,n)):
 l2=[]
 for i in l+(n,):
   aus = ""
   needs_quote= (i=="")
   for c in i:
     if c in '"\\.':
       aus += '\\'
       needs_quote=True
     if ord(c)<=ord(" ") or c in "{},()λ⊦:'":
       needs_quote=True
     aus += c
   if needs_quote:
     aus = '"'+aus+'"'
   l2.append(aus)
 erg = ".".join(l2)
 #  put " around if contains spaces
 return erg

def printsimplename(n): # non-hierarchical name
  return printname(((),n))

def printtype(t):
 if t[0]=="typeOp":
      oper = printname(t[1])
      arguments = t[3]
      if len(arguments)==0:
           return oper
      elif len(arguments)==2 and looklike_operator(oper):
           # infix notation
           return ("("+printtype(arguments[0])+" "+oper+" "
                      +printtype(arguments[1])+")")
      else:
           return "("+oper+" "+" ".join(map(printtype,arguments))+")"
 if t[0]=="typeVar":
      return "'"+printsimplename(t[1])
 else:
      error("unknown type label: "+str(t))
def looklike_operator(o):
  for c in o:
       if "a"<=c<="z" or "A"<=c<="Z" or c==" ": return False
  return True

def printterm((n,typ),type_print=False):
 global types_list # accumulates types of all constants and lambda-variables
# if typ:
#   appendage=":"+printtype(typ)
# else:
#   appendage=""
# print "PT",n
 tag=n[0]
 if tag=="tLambda":
   if type_print:
     types_list.append(("λ",printsimplename(n[1][0][1]),printtype(n[1][1])))
   return "(λ"+printsimplename(n[1][0][1])+". "+printterm(n[2],type_print)+")"
 elif tag=="tApply":
#  if typ:
#   return ("("+printterm(n[1][0],n[1][1])
#           +" "+printterm(n[2][0],n[2][1])+")" + appendage)
#  else:
   return "("+printterm(n[1],type_print)+" "+printterm(n[2],type_print)+")"
 elif tag=="tConst":
   if type_print:
     types_list.append(("C", printname(n[1]), printtype(typ)))
   return "C"+printname(n[1])
 elif tag=="tVar":
   if type_print:
     types_list.append(("V", printsimplename(n[1]), printtype(typ)))
   return "V"+printsimplename(n[1])
 else:
   error("unknown term label in "+str(n))

def dumpstack(short=False,num=3):
  global types_list # accumulates types of all constants and lambda-variables
  print "STACK ["+str(len(stack))+"] (top=last):"
  number = max(len(stack)-num,0) if short else 0
  for l in stack[-num if short else 0 : ]:
    number +=1 
    print str(number)+":",
    try:
      types_list = []
      print " "+prettyprint(l,withtypes=WITH_TYPES)
      show_types()
    except:
      print l
      exit(1)

def dumpdict():
  print "DICTIONARY:"
  for k,l in dictionary.items():
    #print "D",k,l
    print str(k)+": "+prettyprint(l) #,withtypes=WITH_TYPES)

def dumpresult(withtypes=False):
  dumpres("ASSUMPTIONS",assumptions,withtypes)
  dumpres("THEOREMS",theorems,withtypes)

def dumpres(header,thms,withtypes):
  global types_list
  if thms:
    print len(thms), header
    for k,l in enumerate(thms):
      types_list=[]
      print str(k+1)+": "+prettyprint(l,withtypes)
      if withtypes: show_types()

def show_types():
  global types_list
  save = {}
  hot = set() # the type of hot names should always be displayed
  for tag,name,typ in types_list:
      if tag=="λ":
         save[name]=typ
      elif tag=="C" or tag=="V":
         if name in save and save[name]!=typ:
            hot.add(name)
         else:
           save[name]=typ
  save = {}
  for tag,name,typ in types_list:
      if tag=="C" or tag=="V":
         if name not in hot and name in save and save[name]==typ:
            #print "skip",name
            continue
      save[name]=typ
      print "   ",tag,name,":",typ

ARROW = ((), '->') # NOT "→" !
DEBUG = ('oName', ((), 'debug'))
BOOL = ('typeOp', ((), 'bool'), None, ())
EQUALS =  ('tConst', ((), '='), None)

def equals_term(a,b):
   """builds the term "a=b" of type bool. a and b should have the same type"""
   typ = a[1]  
   if typ!=b[1]: print printtype(typ)+"\n"+printtype(b[1])      
   check (typ==b[1],"internal error: equation between different types")
   equalstype_HALF = ('typeOp', ARROW, None, (typ, BOOL))
   equalstype = ('typeOp', ARROW, None, (typ, equalstype_HALF))
   return tApply(tApply((EQUALS,equalstype),a), b)
   return (("tApply",
               (("tApply",(EQUALS,equalstype),a),  equalstype_HALF),
               b
           ), BOOL)
def decompose_equals((t,resulttype)):
  check(resulttype==BOOL,"equation term not of Boolean type")
  ttag,((tag2,(const,equalstype),a), _),b = t
  check(ttag==tag2=="tApply" and const==EQUALS,"term is not an equation")
  check(equalstype[0]=='typeOp' and equalstype[1]==ARROW
        and equalstype[2]==None)
  typ = equalstype[3][0]
  equalstype_HALF = equalstype[3][1]
  check(equalstype_HALF[0]=='typeOp' and equalstype_HALF[1]==ARROW
        and equalstype_HALF[2]==None
        and equalstype_HALF[3][0]==typ and equalstype_HALF[3][1]==BOOL,
        "term is not an equation")
  return a,b,typ

error_recursive = False
def error(mes=""):
    global error_recursive, lastpop, stack
    if not error_recursive:
       error_recursive = True
       print "\nError on line",1+linenum,".",mes
       print "input line:", line
       #dumpdict()
       print "DICTIONARY ["+str(len(dictionary))+"]"
       stack += lastpop
       if lastpop:
        print "stack may have "+str(len(lastpop))+" additional items."
       dumpstack() ##
    else:
         print mes
         print "STACK (top=last):", stack
#    raise RuntimeError("program detected an error")
    exit(1)

#### for debugging ####
def check_type_wellformed(typ):
    if typ[0]=="typeVar":      
      check(len(typ)==2,"malformed type variable")
      return True
    if typ[0]=="typeOp":
      check(len(typ)==4,"malformed type operator")
      return all(check_type_wellformed(t) for t in typ[3])
    else:
     print typ
     error("unknown type tag "+str(typ[0]))

def check_term_wellformed(term):
  expr,typ = term
  check_type_wellformed(typ)
  tag=expr[0]
  if tag=="tLambda":
     check(len(expr)==3,"malformed lambda-abstraction")
     var,vartype = expr[1]
     check(var[0] in ("tbVar","tVar"),"no variable in lambda-abstraction")
     return check_term_wellformed(expr[1]) and check_term_wellformed(expr[2])
  elif tag=="tApply":
     check(len(expr)==3,"malformed application")
     return check_term_wellformed(expr[1]) and check_term_wellformed(expr[2])
  elif tag=="tConst" or tag=="tbVar":
     check(len(expr)==3,"malformed constant term")
     return True
  elif tag=="tVar":
     check(len(expr)==2,"malformed variable term")
     return True
    
def check(cond,mes=""):
    if not cond:
        error("Condition violated. " + mes)

def push(t,v):
  if t=="oTerm": check_term_wellformed(v)
  stack.append((t,v))

def push_new_oThm(hyps,conclusion,previous=None):
    # hyps = list of terms
    # conclusion = a term
    # previous = tuple of integers
    '''the tuple of numbers "previous" refers to
    the previous theorems on which this is
    based, in terms of entries counted from the top of the stack
    before the command was started.'''
    global theorem_number#, line, linenum
    for _,x in hyps: check_term_wellformed(x)
    check_term_wellformed(conclusion[1])
    theorem_number += 1
    stack.append(("oThm",(hyps,conclusion,theorem_number)))
    if TRACE_DEDUCTIONS:
      print "Line "+str(1+linenum)+", command: "+line,
      if previous:
        print "from ("+",".join(str(lastpop[-x][1][2]) for x in previous)+")",
      print
      print prettyprint(stack[-1],withtypes=WITH_TYPES)
      if WITH_TYPES: show_types()


def pop(num=1):
    # pop num elements from the stack
    if TRACE_COMMANDS and num>3:
      print "before command:",
      dumpstack(short=True,num=num)
    global lastpop # for debugging
    try:
        lastpop=stack[-num:]
        if num==1:
          return stack.pop()
        result = ()
        while num:
          result += (stack.pop(),)
          num -= 1
        return result
    except:
        error("stack underflow")

def mkint(line):
    try:
        v = int(line)
        push("oNum",v)
    except:
        print line
        print int
        error("Invalid number")

def mkstring(line):
    namelist = []
    nextname = ""
    i=1
    while (i<len(line)):
        if line[i]=="\\":
            if i==len(line)-1:
                error('String terminates in "\\"')
            i+=1
            nextname = nextname+line[i]
        elif line[i]==".":
            namelist.append(nextname)
            nextname=""
        elif line[i]=='"':
            push("oName",((tuple(namelist),nextname)))
            if i!=len(line)-1:
                error('Excessive materal after terminating "\\"')
            return
        else:
            nextname = nextname+line[i]
        i+=1
    error('String does not terminate in "\\"')
        

    
def substitute_terms(varbls, replacements, terms):
  # each element of varbls is of the form (string,type)
  # NOT of the form (("tVar",string),type)
  # varbls = [(str,typ) for ((_,str),typ) in varbls] #
  frees = map(freevariables,replacements)
  free_dict = dict( zip(varbls,frees) )
  replacement_dict = dict( zip(varbls,replacements) )
  allvariables =  set(varbls).union(v for x in replacements + terms
                                  for v in varbs(x))
  e1 = map(markboundvariables,terms)
  e2 = [mark_free_occurrences2(term, varbls, free_dict)[0] for term in e1]
  e3 = [replace_and_rename2(replacement_dict,
                           term,allvariables) for term in e2]
  return e3

def substitute_types(varbls, replacements, terms):
  # each element of varbls is of the fom (("tVar",string),type)
  replacement_dict = dict( zip(varbls,replacements) )
  allvariables =  set(varbls).union(v for x in terms for v in varbs(x))

  def type_replace(typ): # works on types
    if typ[0]=="typeVar":      
      return replacement_dict.get(typ[1], typ)
    if typ[0]=="typeOp":
      return typeOp(typ[1], tuple(type_replace(t) for t in typ[3]), typ[2])
    else:
     error("unknown type tag "+str(typ[0]))
     
  def renamevars(term):
   """
   all types will now be instantiated.

   a bound variable u must be renamed if
   1) it is used in a place where
      with the modified type it would refer to a different binding than before
   This is determined in a top-down process, and stored in a rename_dictionary

   In a second phase, the renaming is carried out, and
   all tbVar labels are reset to tVar.
   """
   global new_name_counter
   rename_dict = {}
   current_definitions = {}
   new_name_counter = 0

   def substitute_types_and_check_vars_recursive(term):
      expr,typ = term
      tag=expr[0]
      newtype = type_replace(typ)
      if tag=="tLambda":
         var,vartype = expr[1]
         if var[0]!= "tbVar": error ("no variable in lambda-abstraction")
         newvartype = type_replace(vartype)
         newvar = var[1],newvartype
         save = current_definitions.get(newvar)
         current_definitions[newvar] = var[2]
         v2 = (var,newvartype)
         v3 = substitute_types_and_check_vars_recursive(expr[2])
         if save == None:
          del current_definitions[newvar]
         else:
          current_definitions[newvar] = save
         return ("tLambda",v2,v3),newtype
      elif tag=="tApply":
         return tApply(
                  substitute_types_and_check_vars_recursive(expr[1]),
                  substitute_types_and_check_vars_recursive(expr[2]))
         return  ("tApply",
                  substitute_types_and_check_vars_recursive(expr[1]),
                  substitute_types_and_check_vars_recursive(expr[2])), newtype
      elif tag=="tConst":
         return expr,newtype
      elif tag=="tVar":
         varname= expr[1]
         newvar = varname,newtype
         try:
           currentnumber = current_definitions[newvar]
           # free variable is now confused with a bound variable.
           # decide to rename the bound variable:
           if currentnumber not in rename_dict:
             varname = get_new_name(allvariables)
             rename_dict[currentnumber] = varname
         except KeyError:
           pass
         return expr,newtype
      elif tag=="tbVar":
         varname= expr[1]
         newvar = varname,newtype
         truenumber = expr[2]
         currentnumber = current_definitions[newvar]
         if currentnumber!=truenumber:
           # decide to rename at least one variable:
           if (truenumber not in rename_dict and
               currentnumber not in rename_dict):
             varname = get_new_name(allvariables)
             rename_dict[truenumber] = varname
         return tVar(varname,newtype)
      else:
        error("unknown term tag in "+str(expr))

   def rename_vars_recursive(term):
     # works on expressions, leaves types intact.
     # all tbVar labels are reset to tVar.
     expr,typ = term
     tag=expr[0]
     if tag=="tLambda" or tag=="tApply":
       return (tag,rename_vars_recursive(expr[1]),rename_vars_recursive(expr[2])),typ
     elif tag=="tConst" or tag=="tVar":
       return expr,typ
     elif tag=="tbVar":
       name = rename_dict.get(expr[2], expr[1])
       return tVar(name,typ)
     else:
      error("unknown term tag in "+str(expr))

   newterm = substitute_types_and_check_vars_recursive(term)
   return rename_vars_recursive(newterm)

  e1 = map(markboundvariables,terms)
  e2 = map(renamevars,e1)
  return e2

def get_new_name(allvariables):
  global new_name_counter
  while ("x"+str(new_name_counter)) in allvariables:
    new_name_counter +=1
  return "x"+str(new_name_counter)


def replace_and_rename2(replacement_dict, term, allvariables):
  global new_name_counter
  rename_dict = {}
  new_name_counter = 0
  """  rename_dict is indexed by counter. (name is redundant) """

  def replace_and_rename_recursive(term):
     """
     a bound variable u must be renamed if
     1) var occurs in the scope of the binding of u (as indicated by the x tag)
     2) the replacement contains u as a free variable
   
     All term labels are reset to the forms without x and b extension.
     """
     expr,typ = term
     tag=expr[0]
     if tag=="txLambda":
          var,vartype = expr[1]
          if var[0]!= "tbVar": error ("no variable in lambda-abstraction")
          varname = var[1]
          newvar = varname,vartype
          if newvar in expr[3]:
            # decide to rename:
            varname = get_new_name(allvariables)
            rename_dict[var[2]] = varname
          v2 = tVar(varname,vartype)
          v3 = replace_and_rename_recursive(expr[2])
          return tLambda(v2, v3)
          return (("tLambda", v2, v3),typ)
     elif tag=="txApply":
          tt1 = replace_and_rename_recursive(expr[1])
          tt2 = replace_and_rename_recursive(expr[2])
          return tApply(tt1,tt2)
          return (("tApply",tt1,tt2),typ)
     elif tag=="tConst":
          return term
     elif tag=="txVar": # free variable
          v = (expr[1],typ)
          return replacement_dict.get(v,
                                      tVar(expr[1], typ))
     elif tag=="tbVar": # bounded variable
          name = rename_dict.get(expr[2], 
                                 expr[1]) # old name
          return tVar(name,typ)
     else:
      error("unknown term tag in "+str(expr))

  return replace_and_rename_recursive(term)

def alpha_normalize(t):
  return markboundvariables(t,normalize=True)

def markboundvariables(t,normalize=False):
  """replace each bound variable ("tVar",name) with
  ("tbVar",name,num) where num is a unique index which is increased
  for each newly introduced bound variable.
  maintain a dictionary (name,type)->num which
  gives for each bound variable the number currently in effect.
  If normalize==True, eliminate the name and use ("tVar",num)

  works top-down

  returns a (partially?) new copy of the term t. (This is not
  efficient.)
  """
  global bound_counter,current_definitions
  current_definitions = dict()
  bound_counter = 0
  return markboundvariables_recursive(t,normalize)

def markboundvariables_recursive(t,normalize):
  global bound_counter,current_definitions
  expr,typ = t
  tag = expr[0]
  if tag=="tLambda":
       bound_counter += 1
       var,vartype = expr[1]
       if var[0]!= "tVar": error ("no variable in lambda-abstraction")
       newvar = var[1],vartype
       save = current_definitions.get(newvar)
       current_definitions[newvar] = bound_counter
       if normalize:
         v2 = tVar(bound_counter,vartype)
       else:
         v2 = tbVar(var[1],bound_counter,vartype)
       v3 = markboundvariables_recursive(expr[2],normalize)
       if save == None:
         del current_definitions[newvar]
       else:
         current_definitions[newvar] = save
       return tLambda(v2, v3)
       return (("tLambda", v2, v3),typ)
  elif tag=="tApply":
       tt1 = markboundvariables_recursive(expr[1],normalize)
       tt2 = markboundvariables_recursive(expr[2],normalize)
       return tApply(tt1,tt2)
       return (("tApply",tt1,tt2),typ)
  elif tag=="tConst":
       return t
#  elif tag=="txVar": # free variable
#       return t
  elif tag=="tVar":
       v = (expr[1],typ)
       if v in current_definitions:
         if normalize:
           return tVar(current_definitions[v],typ)
         else:
           return tbVar(expr[1],current_definitions[v],typ)
       else:
         return t
  else:
   error("unknown term tag in "+str(expr))



def mark_free_occurrences2(t, vs, free_dict):
  """Mark all subterms by the set of new free variables ws that
  are introduced by replacing all variables v=(string,type) from
  the list vs that occur as a free variable in the term t.

  Each tag "tTag", "tVar", ... (except "tConst" and "tbVar") is replaced
  by "txTag" etc. to distinguish them from the original records
  They get an additional field to mark them with the above-mentioned
  set ws of variables.

  works recursively bottom-up.

  returns (t2,occurs), where t2 is a (partially?) new copy of the
  expression t (this is maybe not efficient), and 
  "occurs" is the set of variables in t2 (==t2[-1] if this is defined).
  """
  expr,typ = t
  expr = t[0]
  tag = expr[0]
  if tag=="tLambda":
       b = expr[1] # the variable (("tVar",string),type)
       if b[0][0]!= "tbVar": error ("no variable in lambda-abstraction")
       # the following is not necessary, since bound occurrences
       # of this variable are marked as "tbVar":
#       try:
#         i=vs.index((b[0][1],b[1])) # bound variable agrees in name and type
#         vs = vs[:i] + vs[i+1:] # create new copy of vs without this var.
#       except ValueError:
#         pass
       inner2,frees = mark_free_occurrences2(expr[2], vs, free_dict)
       return (("txLambda", b, inner2, frees),typ), frees
  elif tag=="tApply":
       tt1,occ1 = mark_free_occurrences2(expr[1],vs, free_dict)
       tt2,occ2 = mark_free_occurrences2(expr[2],vs, free_dict)
       occ = occ1 | occ2
       return (("txApply",tt1,tt2,occ),typ), occ
  elif tag=="tConst" or tag=="tbVar":
       return t,set()
  elif tag=="tVar":
       if (expr[1],typ) in vs: # variable agrees in name and type
         result = free_dict[expr[1],typ]
       else:
         result = set()
       return (("txVar",expr[1],result),typ), result
  else:
    error("unknown term tag "+str(t))

def freevariables(tt, boundvars=set()):
  "result and boundvars are sets of (string,type)"
  t,typ = tt
  tag = t[0]
  if tag=="tLambda":
       (bvtag,bvname),bvtype = t[1]
       check(bvtag=="tVar" or bvtag=="tbVar")
       var = bvname,bvtype
       b = boundvars | set([var])
       return freevariables(t[2],b)
  elif tag=="tApply":
       return freevariables(t[1],boundvars) | freevariables(t[2],boundvars)
  elif tag=="tConst" or tag=="tbVar":
       return set()
  elif tag=="tVar":
       v = (t[1],typ)
       if v in boundvars:
            return set()
       return set([v])
  else:
   error("unknown term tag "+str(t))

def all_typevariables(t):
  """return all type variables used in the term t and its subterms.
  result = a set of names"""
  s0 = typevariables(t[1])
  expr = t[0]
  tag = expr[0]
  if tag=="tLambda":
    return s0 | all_typevariables(expr[1]) # type of result is already in s0
  if tag=="tApply":
    return s0 | all_typevariables(expr[2]) # type of result is already in s0
  elif tag=="tConst" or tag=="tVar":
    return s0
  else:
    error("illegal tag "+str(t))

def typevariables(typ):
  "result is a set of names"
  tag = typ[0]
  if tag=="typeVar":
    return set([typ[1]])
  elif tag=="typeOp":
    return set(n for arg in typ[3] for n in typevariables(arg))
  else:
   error("unknown type tag "+str(typ))

def varbs(tt):
  """return the names of all occurring variables.
  These names are to be avoided when renaming variables"""
  t,typ = tt
  tag = t[0]
  if tag=="tLambda" or tag=="tApply":
       return varbs(t[1]) | varbs(t[2])
  elif tag=="tConst":
       return set()
  elif tag=="tVar":
       varname = t[1]
       return set([varname])
  else:
   error("unknown term tag "+str(t))

def consts(t):
  expr = t[0]
  tag = expr[0]
  if tag=="tLambda":
    return consts(expr[2])
  elif tag=="tApply":
    return consts(expr[1])|consts(expr[2])
  elif tag=="tConst":
    return set([(expr[1],expr[2])])
  elif tag=="tVar":
    return set()
  else:
    error("illegal tag "+str(t))

def typeoperators(t):
  """return all type operators used in the term t and its subterms.
  result = a set of (name,number) or (name,None)"""
  s0 = typeoper_t(t[1])
  expr = t[0]
  tag = expr[0]
  if tag=="tLambda" or tag=="tApply":
    return s0 | typeoperators(expr[1])|typeoperators(expr[2])
  elif tag=="tConst" or tag=="tVar":
    return s0
  else:
    error("illegal tag "+str(t))
  
def typeoper_t(typ):
  # set of (name,number)
    if typ[0]=="typeVar":      
      return set()
    if typ[0]=="typeOp":
      return set([(typ[1],typ[2])]) | set(u for
           subtyp in typ[3] for u in typeoper_t(subtyp))
    else:
     error("unknown type tag "+str(typ[0]))



#############################################################################


import sys
if len(sys.argv)<2:
  print helptext
  exit()
infile = sys.argv[1]
for option in sys.argv[2:]:
  if option == "--trace-commands":
    TRACE_COMMANDS = True
  elif option == "--deductions":
    TRACE_DEDUCTIONS = True
  elif option == "--show-types":
    WITH_TYPES = True
  else:
    print "Unknown option",option
    print helptext
    exit()
  
for linenum,line in enumerate(open(infile)):
    lastpop = ()
    if line[-1]=="\n":
        line = line[:-1]
    # remove further whitespace?
    if line=="" or line[0]=="#":
      if TRACE_COMMANDS:
         print "Line",1+linenum,"Comment:",line
      continue # comment
    if TRACE_COMMANDS:
      dumpstack(short=True)
      print "Line",1+linenum,"Command:",line
    if line[0]=='"':
        mkstring(line)
    elif line[0] in "-0123456789":
        mkint(line)
    else:
      #line = line # remove further whitespace?
      if line=="absTerm":
         ((bt,bv),(vt,vv))=pop(2)
         check (bt=="oTerm" and vt=="oVar")
         varname,vartype = vv
         newtype = ('typeOp', ARROW, None, (vartype, bv[1]))
         push("oTerm", tLambda(tVar(varname,vartype), bv))
         #push("oTerm", (("tLambda", tVar(varname,vartype), bv),newtype ))
      elif line=="absThm":
         ((tht,thv),(vt,vv))=pop(2)
         check (tht=="oThm" and vt=="oVar")
         varname,vartype = vv
         hyps,(_,concl),_ = thv
         a,b,t = decompose_equals(concl)
         "Constraint: The variable v must not be free in hyps."
         check(all(vv not in h for h in hyps),"var is free in hypotheses.")
         bvar=tVar(varname,vartype)
         push_new_oThm(hyps,
                       ("oTerm", equals_term(
                        tLambda(bvar, a),
                        tLambda(bvar, b))),(1,))

      elif line=="appTerm":
         """Constraint: The term f must have a type of the form σ → τ,
         and the term x must have type σ. The result has then type τ"""        
         (t1t,t1v),(t2t,t2v)=pop(2)
         check(t1t==t2t=="oTerm")
         t1 = t1v[1]
         t2 = t2v[1] # types
         check (len(t2)==4 and t2[0]=="typeOp"
                and t2[1]==ARROW and t2[2]==None and len(t2[3])==2)
         check ( t2[3][0] == t1, "types do not match")
         # type of t2:  t2[3][0] => t2[3][1]
         push("oTerm",tApply(t2v,t1v))
#         push("oTerm",(("tApply", t2v,t1v),t2[3][1]))
      elif line=="appThm":
         (t1t,t1v),(t2t,t2v)=pop(2)
         check(t1t==t2t=="oThm")
         hyp1,(t1t,tt1),_ = t1v
         hyp2,(t2t,tt2),_ = t2v
         check(t1t==t2t=="oTerm")
         a1,b1,t1 = decompose_equals(tt1)
         a2,b2,t2 = decompose_equals(tt2)
         check (t2[:3]==("typeOp",ARROW,None) and len(t2[3])==2,
                "function not of type A->B")
         check (t2[3][0] == t1, "types do not match")
         left =tApply(a2,a1)
         right=tApply(b2,b1)
         push_new_oThm(hyp1+[h for h in hyp2 if h not in hyp1],
                     ("oTerm", equals_term(left,right)),(1,2))

      elif line=="assume":
         (tt,tv) = pop()
         check(tt=="oTerm" and tv[1]==BOOL)
         push_new_oThm ([(tt,tv)],(tt,tv))
      elif line=="axiom":
         (tt,tv),(lat,lav) = pop(2)
         check(tt=="oTerm" and tv[1]==BOOL and lat=="oList")
         check(all(at=="oTerm" and av[1]==BOOL for at,av in lav))
         push_new_oThm (lav,(tt,tv))
         assumptions.append(stack[-1])
      elif line=="betaConv":
         tt,tv = pop()
         check(tt=="oTerm")
         texpr,typ=tv
         tlab,fun,replacement = texpr
         check(tlab=="tApply")
         fexpr,ftype = fun
         flab,((_,vname),vtype),term = fexpr
         check(flab=="tLambda")
         newterm = substitute_terms([(vname,vtype)], [replacement], [term])[0]
         push_new_oThm ([],("oTerm", equals_term(tv,newterm)))
      elif line=="cons":
         (lt,lv),o=pop(2)
         check(lt=="oList")
         push("oList",[o]+lv)
      elif line=="const":
         nt,nv = pop()
         check(nt=="oName")
         push("oConst",(nv,None))
      elif line=="constTerm":
         (tt,tv),(ct,cv)=pop(2)
         check(tt=="oType" and ct=="oConst")
         push("oTerm", tConst(cv[0], cv[1], tv))
      elif line=="deductAntisym":
         (t1t,t1v),(t2t,t2v)=pop(2)
         check(t1t==t2t=="oThm")
         hyp1,(t1t,tt1),_ = t1v
         hyp2,(t2t,tt2),_ = t2v
         check(t1t==t2t=="oTerm")
         try:
           hyp1.remove((t2t,tt2))
         except:
           pass
         try:
           hyp2.remove((t1t,tt1))
         except:
           pass
         push_new_oThm(hyp2+
                      [h for h in hyp1 if h not in hyp2],
                      ("oTerm", equals_term(tt2,tt1)),(1,2))
      elif line=="def":
         (nt,nv) =pop()
         check(nt=="oNum")
         if not stack:
             error("stack underflow")
         dictionary[nv]=stack[-1]
      elif line=="defineConst":
         (tt,tv),(nt,nv)=pop(2)
         check(tt=="oTerm" and nt=="oName")
         definition = alpha_normalize(tv)
       # definition=(linenum,0) !!
         push("oConst",(nv,definition))
         push_new_oThm([],("oTerm", equals_term(
            tConst(nv,definition,tv[1]), tv)))
      elif line=="defineConstList": # [new in version 6]
         (thmt,thmv),(lt,lv) = pop(2)
         check (thmt=="oThm" and lt=="oList")
         hyps,(_,phi),thmnum = thmv
         k = len(hyps)
         check(len(lv)==k,"list length does not match hypotheses")
         check(all(et=="oList" and len(el)==2 for et,el in lv),
               "not a list of pairs")
         elts = [el for _,el in lv]
         check(all(el[0][0]=="oName" and el[1][0]=="oVar" for el in elts))
         varbls = [el[1][1] for el in elts]
         replacements = [tConst(el[0][1],(linenum,num+1),el[1][1][1])
                         for num,el in enumerate(elts)]
         constlist = [("oConst",(el[0][1],(linenum,num+1)))
                         for num,el in enumerate(elts)]
         hyp_vars = set()
         for _,t in hyps:
            var,term,typ =  decompose_equals(t)
            check (var[0][0]=="tVar","LHS not a variable")
            check (len(freevariables(term))==0, "RHS contains free variables")
            check (all_typevariables(term).issubset(typevariables(term[1])),
                   "type variables in subterms only.")
            varname_type = var[0][1],var[1]
            check(varname_type not in hyp_vars,"variables not distinct")
            hyp_vars.add(varname_type)
         check(freevariables(phi).issubset(hyp_vars),
               "phi contains other free variables.")
         push("oList",constlist)
         push_new_oThm([], ("oTerm",
             substitute_terms(varbls, replacements, [phi])[0]), (1,))
      elif line=="defineTypeOp": #[changed in version 6]
         (tht,thv),(lt,lv),(rept,repv),(abst,absv),(nt,nv)=pop(5)
         check(tht=="oThm" and lt=="oList" and
               abst==rept==nt=="oName"
               and all(xt=="oName" for xt,_ in lv))
         hyp,(_,((exprtag,phi,t_term),typ)),_ = thv
         check(hyp==[],"hypotheses set not empty.")
         check(exprtag=="tApply")
         check(len(freevariables(phi))==0,"phi contains free variables.")
         tau = t_term[1] # type tau
         check(all(namespace==() for __,(namespace,xv) in lv),
               "namespace not empty")
         namelist = tuple(xv for __,(namespace,xv) in lv)
         check(len(namelist)==len(set(namelist)),"duplicate names")
         check(set(namelist)==all_typevariables(phi),
               "type variable list incorrect")
         typeopnumber = const_id_number+1
         absnumber = ("oNum",const_id_number+2)
         repnumber = ("oNum",const_id_number+3)
#         absnumber = (linenum, "abs")
#         repnumber = (linenum, "rep")
         newtype = typeOp(nv,tuple(map(typeVar,namelist)),typeopnumber)
         push("oTypeOp", (nv,typeopnumber))
         # Where is the arity stored? NOWHERE?? !!
         const_id_number += 3
         push("oConst", (absv, absnumber))
         push("oConst", (repv, repnumber))
         abstyp = ('typeOp', ARROW, None, (tau, newtype))
         reptyp = ('typeOp', ARROW, None, (newtype, tau))
         var_a = (('tVar',"a"),newtype)
         var_r = (('tVar',"r"),tau)
         thm1_left = tApply(
              tConst(absv, absnumber, abstyp),
              tApply(tConst(repv, repnumber, reptyp),
                     var_a))
         thm1_right = var_a
         thm2_left = tApply(phi, var_r) # left for version 5, right for v.6
         thm2_right = equals_term(
           tApply(tConst(repv, repnumber, reptyp),
                  tApply(tConst(absv, absnumber, abstyp),
                         var_r)),
           var_r)
         if version>5:
           thm1_left = tLambda(var_a, thm1_left)
           thm1_right= tLambda(var_a, thm1_right)
           # SWAP SIDES  between v5 and v6!
           thm2_left,thm2_right = (tLambda(var_r, thm2_right),
                                   tLambda(var_r, thm2_left))
         thm1 = equals_term(thm1_left,thm1_right)
         thm2 = equals_term(thm2_left,thm2_right)
         push_new_oThm([],("oTerm",thm1),(1,))
         push_new_oThm([],("oTerm",thm2),(1,))
         if TRACE_DEDUCTIONS:
            print "New type operator",printname(nv),"with",len(namelist),"argument(s)."       
      elif line=="eqMp":
         (t1t,t1v),(t2t,t2v)=pop(2)
         check(t1t==t2t=="oThm")
         hyp1,(t1t,tt1),_ = t1v
         hyp2,(t2t,tt2),_ = t2v
         check(t1t==t2t=="oTerm")
         a2,b2,t2 = decompose_equals(tt2)
         check(tt1[1]==t2, "type mismatch")
         check(alpha_normalize(tt1)==alpha_normalize(a2),
               "not α-equivalent")
         push_new_oThm(hyp1+[h for h in hyp2 if h not in hyp1],
                      ("oTerm", b2),(1,2)) 
      elif line=="hdTl": # [new in version 6]
         lt,lv = pop()
         check(lt=="oList" and len(lv)>0)
         push(lv[0][0],lv[0][1])
         push("oList",lv[1:])
      elif line=="nil":
         push("oList",[])
      elif line=="opType":
         (lt,lv),(ot,ov)=pop(2)
         check(lt=="oList" and ot=="oTypeOp" 
             and all(xt=="oType" for xt,xv in lv))
         push("oType", typeOp(ov[0],tuple([xv for xt,xv in lv]),ov[1]))
      elif line=="pop":
         pop()
      elif line=="pragma": # [new in version 6]
         n=pop()
         print "PRAGMA DEBUG. input line number",1+linenum
         if n==DEBUG:
            dumpstack()
            dumpdict()
            print "DICTIONARY ["+str(len(dictionary))+"]"
            dumpresult(withtypes=WITH_TYPES)
         elif n==('oName', ((), 'topstack')):
            dumpstack(short=True)
         elif n==('oName', ((), 'show-types')):
            WITH_TYPES=True
         elif n==('oName', ((), '-show-types')):
            WITH_TYPES=False
         else:
            print "Warning: unknown pragma",n[1]
         print
      elif line=="proveHyp":# [new in version 6]
         (t1t,t1v),(t2t,t2v)=pop(2)
         check(t1t==t2t=="oThm")
         hyp1,t1t,_ = t1v
         hyp2,t2t,_ = t2v
         t2no = alpha_normalize(t2t[1])
         # check(t1t==t2t=="oTerm") # by construction
         push_new_oThm(hyp2+[h for h in hyp1
                        if h not in hyp2
                             and alpha_normalize(h[1]) != t2no], t1t,(1,2))
      elif line=="ref":
         nt,nv=pop()
         check(nt=="oNum")
         try:
             t,v=dictionary[nv]
             push(t,v)
         except:
             error("undefined dictionary entry "+str(nv))
      elif line=="refl":
         tt,tv = pop()
         check(tt=="oTerm")
         push_new_oThm([],("oTerm", equals_term(tv,tv)))
      elif line=="remove":
         nt,nv=pop()
         check(nt=="oNum")
         if nv not in dictionary:
             error("undefined dictionary entry "+str(nv))
         t,v=dictionary[nv]
         push(t,v)
         del dictionary[nv]
      elif line=="subst":
         """
   Pop a theorem Γ ⊦ φ; pop a substitution σ; push the theorem Γ[σ] ⊦ φ[σ]
   After: Thm ((θ[ty1/α1, ..., tym/αm])[t1/v1, ..., tn/vn])
   Note: The type variables are instantiated first,
         followed by the term variables.
   Note: Bound variables will be renamed if necessary to prevent
         distinct variables becoming identical after the instantiation.
   NOTE (added): Two distinct free variables can become identical.
         This cannot be prevented.
   INTERPRETATION. the types of the term substitution must be the
         correct terms AFTER the type instantiations.
         """
         (tt,tv),(st,sv) = pop(2)
         check(tt=="oThm" and st=="oList" and len(sv)==2)
         thm_hyps,thm_conclusion,_ = tv
         thmterms = thm_hyps+[thm_conclusion]
         thmterms = [term for _,term in thmterms]
         s1t,typesub = sv[0]
         s2t,varsub = sv[1]
         check(s1t==s2t=="oList")
         check(all(ut=="oList" and len(uv)==2
                   and uv[0][0]=="oName"
                   and uv[0][1][0]==()
                         # type variable names αi in global namespace
                   and uv[1][0]=="oType" for ut,uv in typesub))
         check(all(ut=="oList" and len(uv)==2
                   and uv[0][0]=="oVar"
                   and uv[1][0]=="oTerm"
                   and uv[0][1][1] == uv[1][1][1]
                        # equal types (not mentioned in spec)
                      for ut,uv in varsub))
         if typesub:
           variables    = [uvar for _,uv in typesub for (_,(_,uvar)) in [uv[0]]]
           replacements = [uvar for _,uv in typesub for (_,uvar) in [uv[1]]]
           check (len(variables)==len(set(variables)))
                # variables must be distinct.
           thmterms = substitute_types(variables, replacements, thmterms)
         if varsub:
           variables    = [uvar for _,uv in varsub for (_,uvar) in [uv[0]]]
           replacements = [uvar for _,uv in varsub for (_,uvar) in [uv[1]]]
           check (len(variables)==len(set(variables)))
                # variables must be distinct.
           thmterms = substitute_terms(variables, replacements, thmterms)
         thmterms = [("oTerm",term) for term in thmterms]
         push_new_oThm(thmterms[:-1],thmterms[-1],(1,))
          
      elif line=="sym": # [new in version 6]
         (tt,tv)= pop()
         check(tt=="oThm")
         hyps,conclusion,_ = tv
         a,b,_ = decompose_equals(conclusion[1])
         push_new_oThm(hyps,("oTerm", equals_term(b,a)),(1,))
      elif line=="thm":
         (tt,tv),(lt,lv),(tht,thv) = pop(3)
         check(tt=="oTerm" and lt=="oList" and tht=="oThm")
         check(all(t=="oTerm" for t,_ in lv))
         hyps,conclusion,number = thv
         check(alpha_normalize(tv)==alpha_normalize(conclusion[1]),
               "not α-equivalent")
         normalized = [alpha_normalize(t) for _,t in lv]
         check(len(set(normalized))==len(normalized),
               "assumptions not α-distinct")
#         for h in hyps: print "H",prettyprint(h)
#         for h in normalized: print "N",printterm((h,0))
         check(all(alpha_normalize(h) in normalized for _,h in hyps),
               "assumption not in Thm")
         theorems.append(("oThm", (lv, ("oTerm", tv), number)))

      elif line=="trans": # [new in version 6]
         (tt1,tv1),(tt2,tv2)= pop(2)
         check(tt1==tt2=="oThm")
         hyps1,conc1,_ = tv1
         hyps2,conc2,_ = tv2
         bb,c,_ = decompose_equals(conc1[1])
         a, b,_ = decompose_equals(conc2[1])
         check(alpha_normalize(b)==alpha_normalize(bb),"not equal")
         push_new_oThm(hyps2+[h for h in hyps1 if h not in hyps2],
                       ("oTerm", equals_term(a,c)),(1,2))
      elif line=="typeOp":
         nt,nv=pop()
         check(nt=="oName")
         push("oTypeOp",(nv,None))
      elif line=="var":
         (tt,tv),(vt,vv)=pop(2)
         check(tt=="oType" and vt=="oName" and vv[0]==())
         push("oVar", (vv[1],tv))
      elif line=="varTerm":
         vt,vv=pop()
         check(vt=="oVar")
         vname,vtype=vv
         push("oTerm", tVar(vname, vtype))
      elif line=="varType":
         (nt,nv)=pop()
         check(nt=="oName" and nv[0]==())
         push("oType", typeVar(nv[1]))
         """Constraint: The name n must be in the global
         namespace (i.e., be of the form ([],s)).

         Note: The OpenTheory standard theory library adopts the HOL
         Light convention of naming type variables with capital
         letters, and theorem provers are expected to map them to
         native conventions as part of processing articles. For
         example, in HOL4 it would be natural to map an OpenTheory
         type variable with name A to a native HOL4 type variable with
         name 'a.    """
         
      elif line=="version": # [new in version 6]:

         """Constraint: The version command may only occur
         as the very first command in an article file.
         IGNORED so far. version switch is allowed any time.
         version 6 commands are allowed any time.

         "defineTypeOp" is the only command which is CHANGED in version 6.
         """
         nt,nv=pop()
         check(nt=="oNum" and nv in [5,6])
         version = nv
      else:
        error("Invalid command")

print "\nFile",infile, "processed successfully,",linenum, "lines read."

linenum,line = -1,"END-OF-FILE"

if stack:
 print "WARNING:", len(stack), "object(s) left on the stack"
if dictionary:
 print "WARNING:", len(dictionary), "object(s) left in the dictionary"

constants = set(p for _,(hyp,conc,_) in assumptions+theorems
             for s in hyp+[conc] for p in consts(s[1]))
typeops = set(p for _,(hyp,conc,_) in assumptions+theorems
             for s in hyp+[conc] for p in typeoperators(s[1]))

external_typeops = [u for (u,v) in typeops if v==None]
internal_typeops = [(u,v) for (u,v) in typeops if v!=None]
if external_typeops:
  print (len(external_typeops)),"external type operator(s):",
  print "  "+" ".join(printname(u) for u in external_typeops)
if internal_typeops:
  print (len(internal_typeops)),"internal type operator(s) (defineTypeOp):",
  print "  "+" ".join(printname(u) for u,v in internal_typeops)

external_constants = [u for (u,v) in constants if v==None]
#!! update
internal_constants = [(u,v) for (u,v) in constants if v!=None and v[0]!="oNum"]
implicit_constants = [(u,v) for (u,v) in constants if v!=None and v[0]=="oNum"]
if external_constants:
  print (len(external_constants)),"external constant(s):",
  print "  "+" ".join(printname(u) for u in external_constants)
if internal_constants:
  print (len(internal_constants)),"internal constant(s) (defineConst):",
  print "  "+" ".join(printname(u) for u,v in internal_constants)
if implicit_constants:
  print (len(implicit_constants)),"implicit constant(s) (defineTypeOp):",
  print "  "+" ".join(printname(u) for u,v in implicit_constants)

dumpresult(withtypes=WITH_TYPES)

if (len(external_typeops)
      +len(internal_typeops) != 
    len(set(external_typeops) | 
        set(u for u,v in internal_typeops))):
    print("ERROR: Name clash: different type operators with the same name.")
    exit(1)
if (len(external_constants)
      +len(internal_constants)
      +len(implicit_constants) != 
    len(set(external_constants) | 
        set(u for u,v in internal_constants+implicit_constants))):
    print("ERROR: Name clash: different constants with the same name.")
    exit(1)

