/*********************                                                        */
/*! \file regexp_operation.h
 ** \verbatim
 ** Top contributors (to current version):
 **   Andrew Reynolds, Tianyi Liang, Andres Noetzli
 ** This file is part of the CVC4 project.
 ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS
 ** in the top-level source directory) and their institutional affiliations.
 ** All rights reserved.  See the file COPYING in the top-level source
 ** directory for licensing information.\endverbatim
 **
 ** \brief Symbolic Regular Expresion Operations
 **
 ** Symbolic Regular Expression Operations
 **/

#include "cvc4_private.h"

#ifndef CVC4__THEORY__STRINGS__REGEXP__OPERATION_H
#define CVC4__THEORY__STRINGS__REGEXP__OPERATION_H

#include <vector>
#include <set>
#include <algorithm>
#include <climits>
#include "util/hash.h"
#include "util/string.h"
#include "theory/theory.h"
#include "theory/rewriter.h"

namespace CVC4 {
namespace theory {
namespace strings {

/**
 * Information on whether regular expressions contain constants or re.allchar.
 *
 * The order of this enumeration matters: the larger the value, the more
 * possible regular expressions could fit the description.
 */
enum RegExpConstType
{
  // the regular expression doesn't contain variables or re.comp,
  // re.allchar or re.range (call these three operators "non-concrete
  // operators"). Notice that re.comp is a non-concrete operator
  // since it can be seen as indirectly defined in terms of re.allchar.
  RE_C_CONRETE_CONSTANT,
  // the regular expression doesn't contain variables, but may contain
  // re.comp, re.allchar or re.range
  RE_C_CONSTANT,
  // the regular expression may contain variables
  RE_C_VARIABLE,
  // the status of the regular expression is unknown (used internally)
  RE_C_UNKNOWN,
};

class RegExpOpr {
  typedef std::pair< Node, CVC4::String > PairNodeStr;
  typedef std::set< Node > SetNodes;
  typedef std::pair< Node, Node > PairNodes;

 private:
  /** the code point of the last character in the alphabet we are using */
  uint32_t d_lastchar;
  Node d_emptyString;
  Node d_true;
  Node d_false;
  Node d_emptySingleton;
  Node d_emptyRegexp;
  Node d_zero;
  Node d_one;

  Node d_sigma;
  Node d_sigma_star;

  std::map<PairNodes, Node> d_simpl_cache;
  std::map<PairNodes, Node> d_simpl_neg_cache;
  std::map<Node, std::pair<int, Node> > d_delta_cache;
  std::map<PairNodeStr, Node> d_dv_cache;
  std::map<PairNodeStr, std::pair<Node, int> > d_deriv_cache;
  std::map<Node, std::pair<Node, int> > d_compl_cache;
  /** cache mapping regular expressions to whether they contain constants */
  std::unordered_map<Node, RegExpConstType, NodeHashFunction> d_constCache;
  std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_cset_cache;
  std::map<Node, std::pair<std::set<unsigned>, std::set<Node> > > d_fset_cache;
  std::map<PairNodes, Node> d_inter_cache;
  std::map<Node, Node> d_rm_inter_cache;
  std::map<Node, bool> d_norv_cache;
  std::map<Node, std::vector<PairNodes> > d_split_cache;
  std::map<PairNodes, bool> d_inclusionCache;
  void simplifyPRegExp(Node s, Node r, std::vector<Node> &new_nodes);
  void simplifyNRegExp(Node s, Node r, std::vector<Node> &new_nodes);
  /**
   * Helper function for mkString, pretty prints constant or variable regular
   * expression r.
   */
  static std::string niceChar(Node r);
  Node mkAllExceptOne(unsigned c);
  bool isPairNodesInSet(std::set<PairNodes> &s, Node n1, Node n2);

  bool containC2(unsigned cnt, Node n);
  Node convert1(unsigned cnt, Node n);
  void convert2(unsigned cnt, Node n, Node &r1, Node &r2);
  bool testNoRV(Node r);
  Node intersectInternal(Node r1,
                         Node r2,
                         std::map<PairNodes, Node> cache,
                         unsigned cnt);
  Node removeIntersection(Node r);
  void firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset);

 public:
  RegExpOpr();
  ~RegExpOpr();

  /**
   * Returns true if r is a "constant" regular expression, that is, a set
   * of regular expression operators whose subterms of the form (str.to.re t)
   * are such that t is a constant (or rewrites to one).
   */
  bool checkConstRegExp( Node r );
  /** get the constant type for regular expression r */
  RegExpConstType getRegExpConstType(Node r);
  void simplify(Node t, std::vector< Node > &new_nodes, bool polarity);
  /**
   * This method returns 1 if the empty string is in r, 2 if the empty string
   * is not in r, or 0 if it is unknown whether the empty string is in r.
   * TODO (project #2): refactor the return value of this function.
   *
   * If this method returns 0, then exp is updated to an explanation that
   * would imply that the empty string is in r.
   *
   * For example,
   * - delta( (re.inter (str.to.re x) (re.* "A")) ) returns 0 and sets exp to
   * x = "",
   * - delta( (re.++ (str.to.re "A") R) ) returns 2,
   * - delta( (re.union (re.* "A") R) ) returns 1.
   */
  int delta( Node r, Node &exp );
  int derivativeS( Node r, CVC4::String c, Node &retNode );
  Node derivativeSingle( Node r, CVC4::String c );
  /**
   * Returns the regular expression intersection of r1 and r2. If r1 or r2 is
   * not constant, then this method returns null and sets spflag to true.
   */
  Node intersect(Node r1, Node r2, bool &spflag);
  /** Get the pretty printed version of the regular expression r */
  static std::string mkString(Node r);

  /**
   * Returns true if we can show that the regular expression `r1` includes
   * the regular expression `r2` (i.e. `r1` matches a superset of sequences
   * that `r2` matches). See documentation in RegExpEntail::regExpIncludes for
   * more details. This call caches the result (which is context-independent),
   * for performance reasons.
   */
  bool regExpIncludes(Node r1, Node r2);
};

}/* CVC4::theory::strings namespace */
}/* CVC4::theory namespace */
}/* CVC4 namespace */

#endif /* CVC4__THEORY__STRINGS__REGEXP__OPERATION_H */
