不确定有穷自动机正则表达式求值

    技术2024-08-19  72

    #include <algorithm> #include <cctype> #include <cstddef> #include <cstring> #include <deque> #include <iostream> #include <queue> #include <stack> #include <utility> #include <vector>   template <size_t _MaxStat> class _state_extractor { public:     typedef unsigned state_type;       class state_overflow {};       _state_extractor():_next_m(0) {}       state_type get() {     if(!_reclaimer_m.empty()) {         state_type _x=_reclaimer_m.front();         _reclaimer_m.pop();         return _x;     }     if(_next_m==_MaxStat) throw state_overflow();     return _next_m++;     }       void get_back(state_type _x) {     _reclaimer_m.push(_x);     }   private:     std::queue<state_type> _reclaimer_m;     unsigned _next_m;   };   template <size_t _MaxStat> class regular_expression { public:     typedef unsigned state_type;     typedef int trans_type;     typedef std::vector<state_type> nfa[_MaxStat][256];     typedef _state_extractor<_MaxStat> state_extractor;       enum {empty};       virtual ~regular_expression() {}       state_type get_begstat() const {     return _begin_m;     }       state_type get_endstat() const {     return _end_m;     }       std::vector<std::vector<state_type> *> &get_endptr_set() {     return _endptr_set_m;     }       void set_begstat(nfa _nfa, state_type _stat) {     for(int _x=0; _x<256; ++_x)         if(!_nfa[_begin_m][_x].empty()) {         _nfa[_stat][_x]=_nfa[_begin_m][_x];         _nfa[_begin_m][_x].clear();         }     _begin_m=_stat;     }       void set_endstat(nfa _nfa, state_type _stat) {     for(std::vector<std::vector<state_type> *>::const_iterator _i=_endptr_set_m.begin(); _i!=_endptr_set_m.end(); ++_i) {         (*_i)->erase(std::find((*_i)->begin(), (*_i)->end(), _end_m));         (*_i)->push_back(_stat);     }     _end_m=_stat;     }   protected:     regular_expression(state_type _begstat, state_type _endstat)     :_begin_m(_begstat)     ,_end_m(_endstat) {}       state_type _begin_m, _end_m;     std::vector<std::vector<state_type> *> _endptr_set_m;   private:     regular_expression(const regular_expression &);   };   template <size_t _MaxStat> class alpha:public regular_expression<_MaxStat> {     typedef regular_expression<_MaxStat> _base_type;   public:     alpha(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, typename _base_type::trans_type _trans)     :_base_type(_se.get(), _se.get()) {     _nfa[this->_begin_m][_trans].push_back(this->_end_m);     this->_endptr_set_m.push_back(&_nfa[this->_begin_m][_trans]);     }   };   template <size_t _MaxStat> class _and:public regular_expression<_MaxStat> {     typedef regular_expression<_MaxStat> _base_type;   public:     _and(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)     :_base_type(_left->get_begstat(), _right->get_endstat()) {     _se.get_back(_left->get_endstat());     _left->set_endstat(_nfa, _right->get_begstat());     this->_endptr_set_m=_right->get_endptr_set();     }       ~_and() {     delete _left_m;     delete _right_m;     }   private:     _base_type *_left_m, *_right_m;   };   template <size_t _MaxStat> class _or:public regular_expression<_MaxStat> {     typedef regular_expression<_MaxStat> _base_type;   public:     _or(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)     :_base_type(_se.get(), _se.get()), _left_m(_left), _right_m(_right) {     _nfa[this->_begin_m][_base_type::empty].push_back(_left_m->get_begstat());     _nfa[this->_begin_m][_base_type::empty].push_back(_right_m->get_begstat());     _nfa[_left_m->get_endstat()][_base_type::empty].push_back(this->_end_m);     _nfa[_right_m->get_endstat()][_base_type::empty].push_back(this->_end_m);     this->_endptr_set_m.push_back(&_nfa[_left_m->get_endstat()][_base_type::empty]);     this->_endptr_set_m.push_back(&_nfa[_right_m->get_endstat()][_base_type::empty]);     }       ~_or() {     delete _left_m;     delete _right_m;     }   private:     _base_type *_left_m, *_right_m;   };   template <size_t _MaxStat> class closure:public regular_expression<_MaxStat> {     typedef regular_expression<_MaxStat> _base_type;   public:     closure(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_original)     :_base_type(_se.get(), _se.get()), _original_m(_original) {     _nfa[this->_begin_m][_base_type::empty].push_back(_original_m->get_begstat());     _nfa[this->_begin_m][_base_type::empty].push_back(this->_end_m);     _nfa[_original_m->get_endstat()][_base_type::empty].push_back(_original_m->get_begstat());     _nfa[_original_m->get_endstat()][_base_type::empty].push_back(this->_end_m);     this->_endptr_set_m.push_back(&_nfa[_original_m->get_endstat()][_base_type::empty]);     }       ~closure() {     delete _original_m;     }   private:     _base_type *_original_m;   };   class lexer { public:     class bad_token {};       lexer(const std::string &_src):_src_m(_src), _iter_m(_src_m.begin()) {}       bool is_already_over() {     leave_out_space();     return _iter_m==_src_m.end();     }       lexer &operator>>(char &_c) {     leave_out_space();     if(!isalnum(*_iter_m)&&*_iter_m!='|'&&*_iter_m!='('&&*_iter_m!=')'&&*_iter_m!='*') throw bad_token();     _c=*_iter_m++;     return *this;     }   private:     void leave_out_space() {     std::string::const_iterator _enditer;     while(_iter_m!=_enditer&&isspace(*_iter_m)) ++_iter_m;     }       std::string _src_m;     std::string::const_iterator _iter_m;   };   class post_regular_expression_factor { public:     std::vector<char> generic(const std::string &_src) const {     std::vector<char> _result;     std::stack<char> _operator_stack;     lexer _lxr(_src);     char _c;     while(!_lxr.is_already_over()) {         _lxr>>_c;         switch(_c) {         case '(':         _operator_stack.push('(');         break;         case ')':         while(_operator_stack.top()!='(') {             _result.push_back(_operator_stack.top());             _operator_stack.pop();         }         _operator_stack.pop();         break;         case '*':         _result.push_back('*');         break;         case '|':         while(!_operator_stack.empty()&&_operator_stack.top()!='(') {             _result.push_back(_operator_stack.top());             _operator_stack.pop();         }         _operator_stack.push('|');         break;         default:         _result.push_back(_c);         }     }     while(!_operator_stack.empty()) {         _result.push_back(_operator_stack.top());         _operator_stack.pop();     }     return _result;     }   };   class regular_expression_nfa_factor { public:     template <size_t _MaxStat>     std::pair<         typename regular_expression<_MaxStat>::state_type,         typename regular_expression<_MaxStat>::state_type> generic(typename regular_expression<_MaxStat>::nfa _nfa, const std::vector<char> &_vec) const {     typename regular_expression<_MaxStat>::state_extractor _se;     std::deque<regular_expression<_MaxStat> *> _stack;     for(std::vector<char>::const_iterator _i=_vec.begin(); _i!=_vec.end(); ++_i) {         if(isalnum(*_i))         _stack.push_back(new alpha<_MaxStat>(_nfa, _se, *_i));         else if(*_i=='*')         _stack.back()=new closure<_MaxStat>(_nfa, _se, _stack.back());         else if(*_i=='|') {         regular_expression<_MaxStat> *_second=_stack.back();         _stack.pop_back();         regular_expression<_MaxStat> *_first=_stack.back();         _stack.pop_back();         _stack.push_back(new _or<_MaxStat>(_nfa, _se, _first, _second));         }     }     while(_stack.size()>=2) {         regular_expression<_MaxStat> *_first=_stack.front();         _stack.pop_front();         regular_expression<_MaxStat> *_second=_stack.front();         _stack.pop_front();         _stack.push_front(new _and<_MaxStat>(_nfa, _se, _first, _second));     }     std::pair<typename regular_expression<_MaxStat>::state_type,           typename regular_expression<_MaxStat>::state_type> _result(_stack.back()->get_begstat(),                                          _stack.back()->get_endstat());     delete _stack.back();     return _result;     }   };     class regular_expression_matcher { public:     template <size_t _MaxStat>     bool match(const char *_srcstr, const char *_patstr) {     post_regular_expression_factor _pref;     std::vector<char> _vec=_pref.generic(_srcstr);     typename regular_expression<_MaxStat>::nfa _nfa;     regular_expression_nfa_factor _renf;     std::pair<typename regular_expression<_MaxStat>::state_type,           typename regular_expression<_MaxStat>::state_type> _r=_renf.generic<_MaxStat>(_nfa, _vec);     return match_impl<_MaxStat>(_nfa, _r.first, _r.second, _patstr);     }   private:     template <size_t _MaxStat>     bool match_impl(typename regular_expression<_MaxStat>::nfa _nfa,             typename regular_expression<_MaxStat>::state_type _beg,             typename regular_expression<_MaxStat>::state_type _end,             const char *_patstr) {     static bool _state_map[_MaxStat];     memset(_state_map, 0, sizeof(bool)*_MaxStat);     std::vector<typename regular_expression<_MaxStat>::state_type> _old_states;     std::vector<typename regular_expression<_MaxStat>::state_type> _new_states;     match_impl_aux<_MaxStat>(_nfa, _beg, _old_states, _state_map);     for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_old_states.begin(); _i!=_old_states.end(); ++_i)         _state_map[*_i]=false;     for(; *_patstr; ++_patstr) {         for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_old_states.begin(); _i!=_old_states.end(); ) {         for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _j=_nfa[*_i][*_patstr].begin(); _j!=_nfa[*_i][*_patstr].end(); ++_j)             match_impl_aux<_MaxStat>(_nfa, *_j, _new_states, _state_map);         _i=_old_states.erase(_i);         }         for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_new_states.begin(); _i!=_new_states.end(); ) {         _old_states.push_back(*_i);         _state_map[*_i]=false;         _i=_new_states.erase(_i);         }     }     return std::find(_old_states.begin(), _old_states.end(), _end)!=_old_states.end();     }       template <size_t _MaxStat>     void match_impl_aux(typename regular_expression<_MaxStat>::nfa _nfa,             typename regular_expression<_MaxStat>::state_type _cur,             std::vector<typename regular_expression<_MaxStat>::state_type> &_states,             bool _state_map[_MaxStat]) {     _states.push_back(_cur);     _state_map[_cur]=true;     for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_nfa[_cur][regular_expression<_MaxStat>::empty].begin(); _i!=_nfa[_cur][regular_expression<_MaxStat>::empty].end(); ++_i)         if(!_state_map[*_i])         match_impl_aux<_MaxStat>(_nfa, *_i, _states, _state_map);     }   };

    最新回复(0)