#include <iostream.h>
#include <strstream.h>
#include <ctype.h>
#include <string>
#include <list>
#include <set>
#include <map>
#include <assert.h>

/*
  "gra2reduc" - version 1 (1999) - durr@lri.fr - GNU copyleft applies
  
  reads a grammar and outputs an equivalent reduced grammar.

INPUT:
  the grammar is of the form:

  S0 -> a S1 S5 b
  S3 -> f g h
  S1 -> c S1 S2
  S1 -> f S4
  S1 -> d
  S1 -> i j k
  S2 -> e S2
  S4 -> S2 g h
  S5 -> S6 word
  S6 -> one

  where 
  (1) every line must contain the symbol "->", so if you have
      comments or blank lines in your file you may want to run

            grep " \-> " file.gra | gra2reduc PRODUCTIONS

  (2) our convention: non-terminals are symbols beginning with an "S",
      and "S0" is the start symbol.  (These conventions are easy
      to change in the source.)

      
OUTPUT
  the program will output an equivalent grammar, where 
  (1) every non-terminal can be reached from the start symbol.
  (2) every non-terminal can produce a word containing only terminals.

  You can choose between different formats of output.  
  PRODUCTIONS - is the format we used also as input:

  S0 -> a S1 one word b
  S1 -> d
  S1 -> i j k        

  GENSERIE - will give this:

  S0      =  a * S1 * one * word * b
  S1      =  d
          +  i * j * k

  COMBSTRUCT - will give this:
  S0      = Union( Prod( a, S1, one, word, b)),
  S1      = Union( Prod( d),
                   Prod( i, j, k)),         

enjoy - xtof
*/
  

// ----------- classes

class Grammar;

// ........... symbols

// convention: non terminals begin with 'S'
struct Symbol: public string {
  Symbol() {}
  Symbol(const Symbol &s) :string(s) {}
  Symbol(char *s)         :string(s) {}
  bool isTerminal(void) const {return operator[](0)!='S';}
};

bool operator<(const Symbol &a, const Symbol &b) {
  return strcmp(a.c_str(),b.c_str())<0;
}

struct CmpSymbol {
  bool operator()(const Symbol &a, const Symbol &b) const {return a<b;}
};


// ........... set of symbols (used as set of non terminals)

struct SET: public set<Symbol,CmpSymbol> {
  bool member(const Symbol &s) const {return find(s)!=end();}
  void add(const Symbol &s)          {insert(begin(), s);}
};

// ........... words

struct Word: public list<Symbol> {
  bool isGood(const SET &good) const;
  bool isOneWord(void) const;
  void print(ostream &out, int mode) const;
  void replace(const Grammar &oneWord);
};

bool operator<(const Word   &a, const Word   &b) {
  Word::const_iterator pa=a.begin();
  Word::const_iterator pb=b.begin();
  for (; pb!=b.end(); pb++, pa++) {
    if (pa==a.end() || *pa < *pb)
      return true;
  }
  return false;
}  


// ........... rules

struct Rule: public list<Word> {
  bool isGood(const SET &good) const;
  bool isOneWord(void) const {return size()==1 && (*begin()).isOneWord();}
  void replace(const Grammar &oneWord);
};


// ........... grammar

struct Grammar: public map<Symbol,Rule,CmpSymbol> { 
  void reduce(void);
  bool keep_only(const SET &good);
  bool discard  (const SET &bad );
  void print(ostream &out, int mode) const;
  void replace(const Grammar &oneWord);
};


// ----------- methods

// tests if all symbols are terminals or good non terminals
bool Word::isGood(const SET &good) const {
  for (const_iterator i=begin(); i!=end(); i++)
    if (!( (*i).isTerminal() || good.member(*i) ))
      return false;
  return true;
}

//  word is "one word" if it contains only terminals
bool Word::isOneWord(void) const {
  for (const_iterator i=begin(); i!=end(); i++)
    if (! (*i).isTerminal())
      return false;
  return true;
}

// replaces symbols in a word using the map oneWord
void Word::replace(const Grammar &oneWord) {
  iterator                i, next_i;
  const_iterator   k;
  Grammar::const_iterator j;

  for(i=next_i=begin(); i!=end(); i=next_i) {
    next_i++;
    j = oneWord.find(*i);
    if (j!=oneWord.end()) {
      const Symbol &left  = (*j).first;
      const Rule   &right = (*j).second;
      assert(right.size()==1);
      const Word   &w     = *right.begin();
      for (k=w.begin(); k!=w.end(); k++) 
	insert(i, *k);
      erase(i);
    }
  }
}

// a rule is good if it contains a good word
bool Rule::isGood(const SET &good) const {
  for (const_iterator w=begin(); w!=end(); w++)
    if ((*w).isGood(good))
      return true;
  return false;
}

void Rule::replace(const Grammar &oneWord) {
  iterator i;
  for (i=begin(); i!=end(); i++)
    (*i).replace(oneWord);
}

// reduces the grammar to good non terminals only
// returns true if a non-terminal is bad, 
//              because it produces no good words
bool Grammar::keep_only(const SET &good) {
  iterator       l, next_l;
  Rule::iterator w, next_w;
  bool           retval = false;

  for (next_l=l=begin(); l!=end(); l=next_l) {
    next_l++;
    const Symbol &left  = (*l).first;
          Rule   &right = (*l).second;
    if (!good.member(left))
      erase(l);
    else {
      for (next_w=w=right.begin(); w!=right.end(); ) {
	next_w++;
	if (!(*w).isGood(good))
	  right.erase(w);
	w = next_w;
      }
      if (right.size()==0) {
	erase(l);
	retval = true;
      }
    }
  }
  return retval;
}

void Grammar::replace(const Grammar &oneWord) {
  iterator i;
  for (i=begin(); i!=end(); i++){
    const Symbol &left  = (*i).first;
          Rule   &right = (*i).second;
    right.replace(oneWord);
  }
}

// ----------- input


istream &operator>>(istream &in, Symbol &s) {
  static char buf[1024];
  in >> buf;
  s = Symbol(buf);
  return in;
}

istream &operator>>(istream &in, Word &w) {
  Symbol *s=new Symbol;
  while (in>>*s) 
    w.push_back(*s);
  return in;
}

istream &operator>>(istream &in, Grammar &G) {
  static char buf[1024];
  Symbol left, sep;
  // read lines
  while (in.scan("%[^\n]\n",buf)) {
    istrstream line(buf);
    Word *right=new Word;
    // read rule from beginning
    line >> left >> sep >> *right;
    assert((string)sep=="->");
    G[left].push_back(*right);
  }
  return in;
}


// ----------- output

ostream &operator<<(ostream &out, const Symbol &s) {
  return out << (string)s;
}

void Word::print(ostream &out, int mode) const {
  static const char *sep[3][3] = {
    {"Prod( ", ", ",  ")"},
    {" ",      " * ", ""},
    {" -> ",   " ",   ""}};

  assert((mode>=0)&& (mode<3));
  const char *s = sep[mode][0];
  for (Word::const_iterator i=begin(); i!=end(); i++) {
    out << s << (string)*i;
    s = sep[mode][1];
  }
  out << sep[mode][2];
}

void Grammar::print(ostream &out, int mode) const {
  static const char *sep[3][3] = {
    {"%s\t= Union( ", ",\n\t         ", "),\n"},
    {"%s\t= ",        "\n\t+ ",         "\n"},
    {"%s",            "\n%s",           "\n"}};
    

  assert((mode>=0)&& (mode<3));
  const char *s;
  for (Grammar::const_iterator i=begin(); i!=end(); i++) {
    const Symbol &left  = (*i).first;
    const Rule   &right = (*i).second;
    s = sep[mode][0];
    for (Rule::const_iterator w=right.begin(); w!=right.end(); w++) {
      cout.form(s, left.data());
      (*w).print(out, mode);
      s = sep[mode][1];
    }
    out << sep[mode][2];
  }   
}


// ----------- tests (now unused)


#if 0

main() {
  Word w;
  cin >> w;
  cout << w << " is " << w.isTerminal() << endl;
}


main() {
  Rule r;
  cin >> r;
  cout << r;
}


#endif


// ---------- reduce a grammar

void Grammar::reduce(void) {
  iterator             i, next_i;
  Rule::const_iterator w;
  Word::const_iterator s;
  bool                 modified, new_bad_guys;

  // usefull: set of non terminals which produce at least one
  //          terminal string.
  // find this set = find a fixpoint (until !modified)
  // keep only rules for usefull non terminals

  SET usefull;
  for (modified=true; modified; ) {
    modified = false;
    for (i=begin(); i!=end(); i++) {
      const Symbol &left  = (*i).first;
      const Rule   &right = (*i).second;
      if (!usefull.member(left) && right.isGood(usefull)) {
	usefull.add(left);
	modified=true;
      }  
    }
  }
  keep_only(usefull);
  
  // reachable: set of non terminals which appear in some
  //            production from the starting symbol S0
  // find this set = find a fixpoint (until !modified)
  // keep only rules for reachable non terminals

  // loop on all rules and on all righthand words to find 
  // new reachable non terminals

  do {
    SET reachable;
    reachable.add("S0");
    for (modified=true; modified; ) {
      modified = false;
      for (i=begin(); i!=end(); i++) {
	const Symbol &left  = (*i).first;
	const Rule   &right = (*i).second;
	if (reachable.member(left)) {
	  for (w=right.begin(); w!=right.end(); w++) 
	    for (s=(*w).begin(); s!=(*w).end(); s++)
	      if (!(*s).isTerminal() && !reachable.member(*s)) {
		reachable.add(*s);
		modified = true;
	      }
	}
      }
    }
    new_bad_guys = keep_only(reachable);
   } while( new_bad_guys );
   
   // oneWord: set of non terminals which produce only a single word
   //          for example the empty word
   // find the set oneWord, replace all instances and repeat
   do {
     Grammar oneWord;
     modified = false;
     for (next_i=i=begin(); i!=end(); i=next_i) {
       next_i++;
       const Symbol &left  = (*i).first;
       const Rule   &right = (*i).second;
       if ( right.isOneWord() ) {
         oneWord.insert(*i);
         erase(i);
         modified = true;
         }
     }
     // replace now all instances of oneWord
     if (modified)
       replace(oneWord);
   } while( modified );
}

// ---------- main

  
main(int argc, char *argv[]) {
  int mode=-1;
  if (argc==2) 
    switch (toupper(argv[1][0])) {
    case 'P': mode=2; break;
    case 'C': mode=0; break;
    case 'G': mode=1; break;
    }
  if (mode==-1) {
    cout << "Usage: "<<argv[0]<<" mode"<<endl
	 << "where mode is PRODUCTIONS, COMBSTRUCT or GENSERIE"<<endl;
    exit(1);
  }
  Grammar G;
  cin >> G;
  G.reduce();
  G.print(cout, mode);
}














