#include #include #include #include #include #include #include #include /* "gra2reduc" - version 1 (1999) - durr@lri.fr - GNU copyleft applies reads a grammar and outputs an equivalent reduced grammar. INPUT: the grammar is of the form: S0 -> a S1 S5 b S3 -> f g h S1 -> c S1 S2 S1 -> f S4 S1 -> d S1 -> i j k S2 -> e S2 S4 -> S2 g h S5 -> S6 word S6 -> one where (1) every line must contain the symbol "->", so if you have comments or blank lines in your file you may want to run grep " \-> " file.gra | gra2reduc PRODUCTIONS (2) our convention: non-terminals are symbols beginning with an "S", and "S0" is the start symbol. (These conventions are easy to change in the source.) OUTPUT the program will output an equivalent grammar, where (1) every non-terminal can be reached from the start symbol. (2) every non-terminal can produce a word containing only terminals. You can choose between different formats of output. PRODUCTIONS - is the format we used also as input: S0 -> a S1 one word b S1 -> d S1 -> i j k GENSERIE - will give this: S0 = a * S1 * one * word * b S1 = d + i * j * k COMBSTRUCT - will give this: S0 = Union( Prod( a, S1, one, word, b)), S1 = Union( Prod( d), Prod( i, j, k)), enjoy - xtof */ // ----------- classes class Grammar; // ........... symbols // convention: non terminals begin with 'S' struct Symbol: public string { Symbol() {} Symbol(const Symbol &s) :string(s) {} Symbol(char *s) :string(s) {} bool isTerminal(void) const {return operator[](0)!='S';} }; bool operator<(const Symbol &a, const Symbol &b) { return strcmp(a.c_str(),b.c_str())<0; } struct CmpSymbol { bool operator()(const Symbol &a, const Symbol &b) const {return a { bool member(const Symbol &s) const {return find(s)!=end();} void add(const Symbol &s) {insert(begin(), s);} }; // ........... words struct Word: public list { bool isGood(const SET &good) const; bool isOneWord(void) const; void print(ostream &out, int mode) const; void replace(const Grammar &oneWord); }; bool operator<(const Word &a, const Word &b) { Word::const_iterator pa=a.begin(); Word::const_iterator pb=b.begin(); for (; pb!=b.end(); pb++, pa++) { if (pa==a.end() || *pa < *pb) return true; } return false; } // ........... rules struct Rule: public list { bool isGood(const SET &good) const; bool isOneWord(void) const {return size()==1 && (*begin()).isOneWord();} void replace(const Grammar &oneWord); }; // ........... grammar struct Grammar: public map { void reduce(void); bool keep_only(const SET &good); bool discard (const SET &bad ); void print(ostream &out, int mode) const; void replace(const Grammar &oneWord); }; // ----------- methods // tests if all symbols are terminals or good non terminals bool Word::isGood(const SET &good) const { for (const_iterator i=begin(); i!=end(); i++) if (!( (*i).isTerminal() || good.member(*i) )) return false; return true; } // word is "one word" if it contains only terminals bool Word::isOneWord(void) const { for (const_iterator i=begin(); i!=end(); i++) if (! (*i).isTerminal()) return false; return true; } // replaces symbols in a word using the map oneWord void Word::replace(const Grammar &oneWord) { iterator i, next_i; const_iterator k; Grammar::const_iterator j; for(i=next_i=begin(); i!=end(); i=next_i) { next_i++; j = oneWord.find(*i); if (j!=oneWord.end()) { const Symbol &left = (*j).first; const Rule &right = (*j).second; assert(right.size()==1); const Word &w = *right.begin(); for (k=w.begin(); k!=w.end(); k++) insert(i, *k); erase(i); } } } // a rule is good if it contains a good word bool Rule::isGood(const SET &good) const { for (const_iterator w=begin(); w!=end(); w++) if ((*w).isGood(good)) return true; return false; } void Rule::replace(const Grammar &oneWord) { iterator i; for (i=begin(); i!=end(); i++) (*i).replace(oneWord); } // reduces the grammar to good non terminals only // returns true if a non-terminal is bad, // because it produces no good words bool Grammar::keep_only(const SET &good) { iterator l, next_l; Rule::iterator w, next_w; bool retval = false; for (next_l=l=begin(); l!=end(); l=next_l) { next_l++; const Symbol &left = (*l).first; Rule &right = (*l).second; if (!good.member(left)) erase(l); else { for (next_w=w=right.begin(); w!=right.end(); ) { next_w++; if (!(*w).isGood(good)) right.erase(w); w = next_w; } if (right.size()==0) { erase(l); retval = true; } } } return retval; } void Grammar::replace(const Grammar &oneWord) { iterator i; for (i=begin(); i!=end(); i++){ const Symbol &left = (*i).first; Rule &right = (*i).second; right.replace(oneWord); } } // ----------- input istream &operator>>(istream &in, Symbol &s) { static char buf[1024]; in >> buf; s = Symbol(buf); return in; } istream &operator>>(istream &in, Word &w) { Symbol *s=new Symbol; while (in>>*s) w.push_back(*s); return in; } istream &operator>>(istream &in, Grammar &G) { static char buf[1024]; Symbol left, sep; // read lines while (in.scan("%[^\n]\n",buf)) { istrstream line(buf); Word *right=new Word; // read rule from beginning line >> left >> sep >> *right; assert((string)sep=="->"); G[left].push_back(*right); } return in; } // ----------- output ostream &operator<<(ostream &out, const Symbol &s) { return out << (string)s; } void Word::print(ostream &out, int mode) const { static const char *sep[3][3] = { {"Prod( ", ", ", ")"}, {" ", " * ", ""}, {" -> ", " ", ""}}; assert((mode>=0)&& (mode<3)); const char *s = sep[mode][0]; for (Word::const_iterator i=begin(); i!=end(); i++) { out << s << (string)*i; s = sep[mode][1]; } out << sep[mode][2]; } void Grammar::print(ostream &out, int mode) const { static const char *sep[3][3] = { {"%s\t= Union( ", ",\n\t ", "),\n"}, {"%s\t= ", "\n\t+ ", "\n"}, {"%s", "\n%s", "\n"}}; assert((mode>=0)&& (mode<3)); const char *s; for (Grammar::const_iterator i=begin(); i!=end(); i++) { const Symbol &left = (*i).first; const Rule &right = (*i).second; s = sep[mode][0]; for (Rule::const_iterator w=right.begin(); w!=right.end(); w++) { cout.form(s, left.data()); (*w).print(out, mode); s = sep[mode][1]; } out << sep[mode][2]; } } // ----------- tests (now unused) #if 0 main() { Word w; cin >> w; cout << w << " is " << w.isTerminal() << endl; } main() { Rule r; cin >> r; cout << r; } #endif // ---------- reduce a grammar void Grammar::reduce(void) { iterator i, next_i; Rule::const_iterator w; Word::const_iterator s; bool modified, new_bad_guys; // usefull: set of non terminals which produce at least one // terminal string. // find this set = find a fixpoint (until !modified) // keep only rules for usefull non terminals SET usefull; for (modified=true; modified; ) { modified = false; for (i=begin(); i!=end(); i++) { const Symbol &left = (*i).first; const Rule &right = (*i).second; if (!usefull.member(left) && right.isGood(usefull)) { usefull.add(left); modified=true; } } } keep_only(usefull); // reachable: set of non terminals which appear in some // production from the starting symbol S0 // find this set = find a fixpoint (until !modified) // keep only rules for reachable non terminals // loop on all rules and on all righthand words to find // new reachable non terminals do { SET reachable; reachable.add("S0"); for (modified=true; modified; ) { modified = false; for (i=begin(); i!=end(); i++) { const Symbol &left = (*i).first; const Rule &right = (*i).second; if (reachable.member(left)) { for (w=right.begin(); w!=right.end(); w++) for (s=(*w).begin(); s!=(*w).end(); s++) if (!(*s).isTerminal() && !reachable.member(*s)) { reachable.add(*s); modified = true; } } } } new_bad_guys = keep_only(reachable); } while( new_bad_guys ); // oneWord: set of non terminals which produce only a single word // for example the empty word // find the set oneWord, replace all instances and repeat do { Grammar oneWord; modified = false; for (next_i=i=begin(); i!=end(); i=next_i) { next_i++; const Symbol &left = (*i).first; const Rule &right = (*i).second; if ( right.isOneWord() ) { oneWord.insert(*i); erase(i); modified = true; } } // replace now all instances of oneWord if (modified) replace(oneWord); } while( modified ); } // ---------- main main(int argc, char *argv[]) { int mode=-1; if (argc==2) switch (toupper(argv[1][0])) { case 'P': mode=2; break; case 'C': mode=0; break; case 'G': mode=1; break; } if (mode==-1) { cout << "Usage: "<> G; G.reduce(); G.print(cout, mode); }