# --- coding: utf-8 ---
"""

sed -f clean-edit test.bbl > test.bbl-clean

* remove line breaks
* remove {\_~ special characters from title names journals
  but NOT FROM DOI and URL

* specially mark known names, see list in authorlist.py

* STARTURL??::ENDURL?? wegmachen wenn nix drin steht.
* "submitted" weglassen

* Auswahl eines Jahres.
"""

import sys
selectyears = sys.argv[1:]

infile = "cleanbib.bbl"
ein = open(infile,"r")

outfile = "Publikationserfassung-Daten"
aus = open(outfile,"w")


def warning(x):
  print x
  error

def process_entry():
  year,outbuf = mergelines()
  if not year in selectyears: return
  aus.write ("="*55+" >>> "+str(year)+"\n")
  processauthors(outbuf)

#EXTERN = 2 # defaultwert
#DOKTORAND = 3
#STUDENT = 4
#MITARBEITER = -1 # in der Datenbank!

from authorlist import fu_authors,EXTERN,DOKTORAND,STUDENT,MITARBEITER



allauthors = set()

def processauthors(buf):
  outbuf = []
  authors = []
  aus.write("*"*50+"\n****\n")
  for l in buf:
    if l.startswith("***"):
      aus.write(l+"\n")
  aus.write("****\n"+"*"*50+"\n")
  for l in buf:
    if l.startswith("STARTNAME:") and l.endswith(":ENDNAME"):
      nam = l[len("STARTNAME:"):-len(":ENDNAME")]
    if l.startswith("STARTVORNAME:") and l.endswith(":ENDVORNAME"):
      vornam = l[len("STARTVORNAME:"):-len(":ENDVORNAME")]
      authors.append((nam,vornam))
      allauthors.add((nam,vornam))
  aus.write("Autoren:\n" if len(authors)!=1 else "Autor:\n")
  num = 1
  for n,v in authors:
    fmt = n+", "+v
    aus.write("%2d: " %num)
    num += 1
    if fmt in fu_authors:
       if fu_authors[fmt]== MITARBEITER:
          aus.write("%-30s    !!! FU-DATENBANK!!!\n" % fmt)
       else:
          aus.write("%-30s (%d)\n" % (fmt,fu_authors[fmt]))
    else:
      aus.write(fmt +"\n")
  aus.write("---\n")
  num = 1
  for n,v in authors:
    fmt = n+", "+v
    status = fu_authors.get(fmt, EXTERN)
    if status != MITARBEITER:
      aus.write("STARTNAME%d:%s:ENDNAME%d\n" % (num,n,num))
      aus.write("STARTVORNAME%d:%s:ENDVORNAME%d\n" % (num,v,num))
      aus.write("STARTSTATUS%d:%d:ENDSTATUS%d\n" % (num,status,num))
      num += 1
  aus.write("---\n")
  for l in buf:
    if not( l.startswith("STARTNAME:") and l.endswith(":ENDNAME") or
            l.startswith("STARTVORNAME:") and l.endswith(":ENDVORNAME")):
      aus.write(l+"\n")

def mergelines():
  """assumption: START only a beginning of lines
  END... can be anywhere.
  Caution: bibtex breaks overly long words with % at the end"""
  outbuf = []
  bufline = ""
  waitclose = None
  year = None
  for line in buf:
    line = line.rstrip()
    if line.startswith("START"):
        en = line.find(":")
        if en>0:
           if waitclose != None:
             print outbuf,bufline,line
             warning("START"+waitclose+ " not matched by END.")
             outbuf.append(bufline)
             bufline = ""
           waitclose = line[5:en]
    if waitclose != None:
        if bufline=="":
           bufline = line
        elif bufline.endswith("%"):
           bufline = bufline[:-1]+line.strip()
        else:
           bufline += " "+line.strip()
        if bufline.find(":END"+waitclose)>=0:
            if waitclose == "JAHR":
              year = bufline[len("STARTJAHR:"):-len(":ENDJAHR")]
            if not bufline.startswith(("DOI:","URL:","?URL:","??URL:"),5):
              bufline = bufline.translate(notrans, "{}_\\$^") # delete
            if bufline.startswith("DOI:doi:",5):
              bufline = bufline[:9]+bufline[13:] # redundant doi: (fix publimac!)
            if bufline in ("STARTNOTE:submitted:ENDNOTE",
                           "STARTNOTE:to appear:ENDNOTE"):
              year = -1
            if bufline.startswith("EPRINT:arXiv:",5):
              en = bufline.find(":END"+waitclose)
              arxiv = bufline[len("STARTEPRINT:arXiv:"):en]
              if year not in selectyears:
                 year = "20"+arxiv[:2]
              outbuf.append("START?URL:http://arxiv.org/abs/"+arxiv
                            + ":END?URL")
              outbuf.append("**** Andere elektronische Veröffentlichung")
            if bufline not in ("START??URL::END??URL",
                               "STARTEPRINT::ENDEPRINT",
                               "STARTNOTE::ENDNOTE"):
              outbuf.append(bufline)
            bufline = ""
            waitclose = None
    else:
        outbuf.append(line)
        if not year: ## cheat for "unformatted" bibtex entries
          for y in selectyears:
             if line.find(y)>=0: year = y
  if waitclose != None:
        print outbuf, bufline, line
        warning("START"+waitclose+ " not matched by END.")
        outbuf.append(bufline)
  return year,outbuf

from string import maketrans
notrans = maketrans("","")

buf = []
for line in ein:
  if buf!=[] and len(line)>20 and line=="="*(len(line)-1)+"\n":
      process_entry()
      buf = [] # swallow line
  else:
      buf.append(line.rstrip())
if buf!=[]:
      process_entry()

allauthors = list(allauthors)
allauthors.sort()

for n,v in allauthors:
    fmt = n+", "+v
    if fmt in fu_authors and fu_authors[fmt]== MITARBEITER:
        print "%-30s" % fmt, "   !!! FU-DATENBANK!!!"
print
for n,v in allauthors:
    fmt = n+", "+v
    if not ( fmt in fu_authors and fu_authors[fmt]== MITARBEITER):
        print "%-30s" % ('"'+fmt+'": ,'), str(fu_authors.get(fmt," "))

