// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#ifdef __GNUG__
#pragma implementation
#endif
#include "splib.h"
#include "ParserState.h"
#include "InternalInputSource.h"
#include "MessageArg.h"
#include "macros.h"
#include "SgmlParser.h"
#include "IListIter.h"
#include "ParserMessages.h"
#include "Undo.h"
#include "Trie.h"

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

const Location ParserState::nullLocation_;
sig_atomic_t ParserState::dummyCancel_ = 0;

static const size_t eventSizes[] = {
#define EVENT(c, f) sizeof(c),
#include "events.h"
#undef EVENT
};

static const size_t internalSizes[] = {
  sizeof(InternalInputSource),
  sizeof(OpenElement),
  sizeof(UndoStartTag),
  sizeof(UndoEndTag),
  sizeof(UndoTransition)
};

static
size_t maxSize(const size_t *v, size_t n, size_t max = 0)
{
  for (size_t i = 0; i < n; i++) {
    if (v[i] > max)
      max = v[i];
  }
  return max;
}

ParserState::ParserState(const Ptr<EntityManager> &em,
			 const ParserOptions &opt,
			 unsigned subdocLevel,
			 Phase finalPhase)
: entityManager_(em),
  options_(opt),
  inInstance_(0),
  keepingMessages_(0),
  eventAllocator_(maxSize(eventSizes, SIZEOF(eventSizes)), 50),
  internalAllocator_(maxSize(internalSizes, SIZEOF(internalSizes), EntityOrigin::allocSize), 50),
  handler_(&eventQueue_),
  subdocLevel_(subdocLevel),
  inputLevel_(0),
  specialParseInputLevel_(0),
  markedSectionLevel_(0),
  markedSectionSpecialLevel_(0),
  currentMode_(proMode),
  hadLpd_(0),
  resultAttributeSpecMode_(0),
  pass2_(0),
  activeLinkTypesSubsted_(0),
  allowPass2_(0),
  hadPass2Start_(0),
  pcdataRecovering_(0),
  currentMarkup_(0),
  cancelPtr_(&dummyCancel_),
  finalPhase_(finalPhase),
  hadAfdrDecl_(0)
{
}

void ParserState::inheritActiveLinkTypes(const ParserState &parent)
{
  activeLinkTypes_ = parent.activeLinkTypes_;
  activeLinkTypesSubsted_ = parent.activeLinkTypesSubsted_;
}

void ParserState::allDone()
{
  phase_ = noPhase;
}

void ParserState::setPass2Start()
{
  ASSERT(inputLevel_ == 1);
  if (hadPass2Start_)
    return;
  hadPass2Start_ = 1;
  if (!pass2() && sd().link() && activeLinkTypes_.size() > 0) {
    allowPass2_ = 1;
    pass1Handler_.init(handler_);
    handler_ = &pass1Handler_;
    const InputSourceOrigin *p
      = currentLocation().origin()->asInputSourceOrigin();
    pass2StartOffset_= p->startOffset(currentLocation().index());
  }
  else {
    allowPass2_ = 0;
    currentInput()->willNotRewind();
  }
}

void ParserState::allLinkTypesActivated()
{
  if (activeLinkTypes_.size() == 0 && inputLevel_ == 1)
    currentInput()->willNotRewind();
}

Boolean ParserState::maybeStartPass2()
{
  if (pass2_ || !allowPass2_)
    return 0;
  handler_ = pass1Handler_.origHandler();
  if (!nActiveLink() || pass1Handler_.hadError()) {
    while (!pass1Handler_.empty()) {
      if (cancelled())
	return 0;
      pass1Handler_.get()->handle(*handler_);
    }
    InputSource *top = 0;
    for (IListIter<InputSource> iter(inputStack_);
	 !iter.done();
	 iter.next())
      top = iter.cur();
    if (top)
      top->willNotRewind();
    return 0;
  }
  pass1Handler_.clear();
  while (inputLevel_ > 1) {
    InputSource *p = inputStack_.get();
    inputLevel_--;
    delete p;
  }
  // Caller will call allDone() if inputLevel_ is 0.
  if (inputLevel_ == 0)
    return 0;
  if (!inputStack_.head()->rewind(*this)) {
    inputLevel_ = 0;
    delete inputStack_.get();
    return 0;
  }
  inputStack_.head()->willNotRewind();
  for (; pass2StartOffset_ > 0; pass2StartOffset_--)
    if (inputStack_.head()->get(messenger()) == InputSource::eE) {
      message(ParserMessages::pass2Ee);
      inputLevel_ = 0;
      delete inputStack_.get();
      return 0;
    }
  specialParseInputLevel_ = 0;
  markedSectionLevel_ = 0;
  markedSectionSpecialLevel_ = 0;
  currentMode_ = proMode;
  hadLpd_ = 0;
  allowPass2_ = 0;
  hadPass2Start_ = 0;
  currentMarkup_ = 0;
  inputLevel_ = 1;
  inInstance_ = 0;
  defDtd_.clear();
  defLpd_.clear();
  dtd_[0].swap(pass1Dtd_);
  dtd_.clear();
  dsEntity_.clear();
  currentDtd_.clear();
  currentDtdConst_.clear();
  phase_ = noPhase;
  pass2_ = 1;
  lpd_.clear();
  allLpd_.clear();
  return 1;
}

Boolean ParserState::referenceDsEntity(const Location &loc)
{
  if (dsEntity_.isNull())
    return 0;
  Ptr<EntityOrigin> origin
    = EntityOrigin::make(internalAllocator(), dsEntity_, loc);
  dsEntity_->dsReference(*this, origin);
  dsEntity_.clear();
  return inputLevel() > 1;
}

void ParserState::startDtd(const StringC &name)
{
  defDtd_ = new Dtd(name, dtd_.size() == 0);
  defLpd_.clear();
  for (size_t i = 0; i < options().includes.size(); i++) {
    StringC name = options().includes[i];
    const SubstTable<Char> *subst = syntax().entitySubstTable();
    for (size_t j = 0; j < name.size(); j++)
      subst->subst(name[j]);
    Text text;
    text.addChars(syntax().reservedName(Syntax::rINCLUDE), Location());
    Entity *entity
      = new InternalTextEntity(name,
			       Entity::parameterEntity,
			       Location(),
			       text,
			       InternalTextEntity::none);
    entity->setUsed();
    defDtd_->insertEntity(entity);
  }
  size_t nEntities = instanceSyntax_->nEntities();
  for (size_t i = 0; i < nEntities; i++) {
    Text text;
    text.addChar(instanceSyntax_->entityChar(i), Location());
    Entity *entity
      = new InternalCdataEntity(instanceSyntax_->entityName(i),
				Location(),
				text);
    defDtd_->insertEntity(entity);
  }
  currentDtd_ = defDtd_;
  currentDtdConst_ = defDtd_;
  currentMode_ = dsMode;
}

void ParserState::endDtd()
{
  dtd_.push_back(defDtd_);
  defDtd_.clear();
  currentDtd_.clear();
  currentDtdConst_.clear();
  currentMode_ = proMode;
}

void ParserState::startLpd(Ptr<Lpd> &lpd)
{
  defLpd_ = lpd;
  defDtd_ = defLpd_->sourceDtd();
  currentDtd_ = defLpd_->sourceDtd();
  currentDtdConst_ = defLpd_->sourceDtd();
  currentMode_ = dsMode;
}

void ParserState::endLpd()
{
  hadLpd_ = 1;
  if (defLpd_->active())
    lpd_.push_back(defLpd_);
  allLpd_.push_back(defLpd_);
  defLpd_.clear();
  currentDtd_.clear();
  currentDtdConst_.clear();
  currentMode_ = proMode;
}

void ParserState::popInputStack()
{
  ASSERT(inputLevel_ > 0);
  InputSource *p = inputStack_.get();
  inputLevel_--;
  delete p;
  if (specialParseInputLevel_ > 0 && inputLevel_ == specialParseInputLevel_)
    currentMode_ = specialParseMode_;
  if (currentMode_ == dsiMode
      && inputLevel_ == 1
      && markedSectionLevel_ == 0)
    currentMode_ = dsMode;
  if (inputLevelElementIndex_.size())
    inputLevelElementIndex_.resize(inputLevelElementIndex_.size() - 1);
}

void ParserState::setSd(ConstPtr<Sd> sd)
{
  sd_ = sd;
  mayDefaultAttribute_ = (sd_->omittag() || sd_->attributeDefault());
  validate_ = sd_->typeValid();
  implydefElement_ = sd_->implydefElement();
  implydefAttlist_ = sd_->implydefAttlist();
}

void ParserState::setSyntax(ConstPtr<Syntax> syntax)
{
  syntax_ = syntax;
  prologSyntax_ = syntax;
  instanceSyntax_ = syntax;
}

void ParserState::setSyntaxes(ConstPtr<Syntax> prologSyntax,
			      ConstPtr<Syntax> instanceSyntax)
{
  syntax_ = prologSyntax;
  prologSyntax_ = prologSyntax;
  instanceSyntax_ = instanceSyntax;
}

void ParserState::pushInput(InputSource *in)
{
  if (!in)
    return;
  if (!syntax_.isNull() && syntax_->multicode())
    in->setMarkupScanTable(syntax_->markupScanTable());
  inputStack_.insert(in);
  inputLevel_++;
  if (specialParseInputLevel_ > 0 && inputLevel_ > specialParseInputLevel_)
    currentMode_ = rcconeMode;	// mode for rcdata in an entity
  else if (currentMode_ == dsMode)
    currentMode_ = dsiMode;
  if (inInstance_ && sd().integrallyStored())
    inputLevelElementIndex_.push_back(tagLevel() ? currentElement().index() : 0);
}

void ParserState::startMarkedSection(const Location &loc)
{
  markedSectionLevel_++;
  markedSectionStartLocation_.push_back(loc);
  if (currentMode_ == dsMode)
    currentMode_ = dsiMode;
  if (markedSectionSpecialLevel_)
    markedSectionSpecialLevel_++;
}

void ParserState::startSpecialMarkedSection(Mode mode, const Location &loc)
{
  markedSectionLevel_++;
  markedSectionStartLocation_.push_back(loc);
  specialParseInputLevel_ = inputLevel_;
  markedSectionSpecialLevel_ = 1;
  specialParseMode_ = currentMode_ = mode;
}

void ParserState::endMarkedSection()
{
  ASSERT(markedSectionLevel_ > 0);
  markedSectionLevel_--;
  markedSectionStartLocation_.resize(markedSectionStartLocation_.size()
					- 1);
  if (markedSectionSpecialLevel_ > 0) {
    markedSectionSpecialLevel_--;
    if (markedSectionSpecialLevel_ > 0)
      return;			// remain in imsMode
    specialParseInputLevel_ = 0;
    if (inInstance_)
      currentMode_ = contentMode();
    else
      currentMode_ = dsiMode;
  }
  if (currentMode_ == dsiMode
      && inputLevel_ == 1
      && markedSectionLevel_ == 0)
    currentMode_ = dsMode;
}

void ParserState::pushElement(OpenElement *e)
{
  ContentState::pushElement(e);
  pcdataRecovering_ = 0;
  // the start tag of this element may have been implied by data
  // inside a cdata or rcdata marked section
  if (markedSectionSpecialLevel_ == 0) {
    currentMode_ = contentMode();
    if (e->requiresSpecialParse()) {
      specialParseMode_ = currentMode_;
      specialParseInputLevel_ = inputLevel_;
    }
  }
}

// PCDATA was encountered somewhere where it was not allowed.
// Change the current mode to improve recovery.

void ParserState::pcdataRecover()
{
  switch (currentMode_) {
  case econMode:
    currentMode_ = mconMode;
    break;
  case econnetMode:
    currentMode_ = mconnetMode;
    break;
  default:
    break;
  }
  pcdataRecovering_ = 1;
}

OpenElement *ParserState::popSaveElement()
{
  OpenElement *e = ContentState::popSaveElement();
  // the end tag of this element may have been implied by data
  // inside a cdata or rcdata marked section
  if (markedSectionSpecialLevel_ == 0) {
    currentMode_ = contentMode();
    specialParseInputLevel_ = 0;
  }
  pcdataRecovering_ = 0;
  return e;
}

void ParserState::popElement()
{
  delete popSaveElement();
}
			      
Boolean ParserState::entityIsOpen(const EntityDecl *entityDecl) const
{
  for (IListIter<InputSource> iter(inputStack_); !iter.done(); iter.next())
    if (iter.cur()->currentLocation().origin()->entityDecl() == entityDecl)
      return 1;
  return 0;
}

void ParserState::startInstance()
{
  if (!instanceSyntax_.isNull())
    syntax_ = instanceSyntax_;
  currentMode_ = econMode;
  currentDtd_ = dtd_[0];
  currentDtdConst_ = dtd_[0];
  startContent(currentDtd());
  inInstance_ = 1;
  if (sd().rank())
    currentRank_.assign(currentDtd().nRankStem(), StringC());
  currentAttributes_.clear();
  currentAttributes_.resize(currentDtd().nCurrentAttribute());
  idTable_.clear();
}

Id *ParserState::lookupCreateId(const StringC &name)
{
  Id *id = idTable_.lookup(name);
  if (!id) {
    id = new Id(name);
    idTable_.insert(id);
  }
  return id;
}

ConstPtr<Entity>
ParserState::lookupEntity(Boolean isParameter,
			  const StringC &name,
			  const Location &useLocation,
			  Boolean referenced)
{
  Dtd *dtd;
  if (resultAttributeSpecMode_)
    dtd = defComplexLpd().resultDtd().pointer();
  else
    dtd = currentDtd_.pointer();
  if (dtd) {
    Ptr<Entity> entity(dtd->lookupEntity(isParameter, name));
    // Did we find it in pass1Dtd?
    // Did we look at the defaultEntity?
    if (!inInstance_ && pass2() && dtd->isBase()
	&& !resultAttributeSpecMode_
	&& (entity.isNull() || !entity->declInActiveLpd())) {
      ConstPtr<Entity> entity1
	= pass1Dtd_->lookupEntity(isParameter, name);
      if (!entity1.isNull() && entity1->declInActiveLpd()
	  && !entity1->defaulted()) {
	if (referenced)
	  noteReferencedEntity(entity1, 1, 0);
	return entity1;
      }
      else if (!entity.isNull()) {
	if (referenced)
	  noteReferencedEntity(entity, 0, 0);
	entity->setUsed();
	return entity;
      }
    }
    else if (!entity.isNull()) {
      entity->setUsed();
      return entity;
    }
    if (!isParameter) {
      ConstPtr<Entity> entity(dtd->defaultEntity());
      Boolean note = 0;
      Boolean usedPass1 = 0;
      if (!inInstance_ && pass2() && dtd->isBase()
	  && !resultAttributeSpecMode_
	  && (entity.isNull() || !entity->declInActiveLpd())) {
	if (referenced)
	  note = 1;
	ConstPtr<Entity> entity1 = pass1Dtd_->defaultEntity();
	if (!entity1.isNull() && entity1->declInActiveLpd()) {
	  usedPass1 = 1;
	  entity = entity1;
	}
      }
      if (!entity.isNull()) {
	Boolean mustCopy = 1;
	if (inInstance_) {
	  ConstPtr<Entity> tem 
	    = instanceDefaultedEntityTable_.lookupConst(name);
	  if (!tem.isNull()) {
	    entity = tem;
	    mustCopy = 0;
	  }
	}
	if (mustCopy) {
	  Ptr<Entity> p(entity->copy());
	  p->setName(name);
	  p->generateSystemId(*this);
	  p->setDefaulted();
	  entity = p;
	  if (inInstance_) {
	    instanceDefaultedEntityTable_.insert(p);
	    eventHandler().entityDefaulted(new (eventAllocator())
					   EntityDefaultedEvent(entity,
								useLocation));
	  }
	  else
	    dtd->insertEntity(p);
	}
	if (note)
	  noteReferencedEntity(entity, usedPass1, 1);
      }
      else
	entity = undefinedEntityTable_.lookupConst(name);
      return entity;
    }
  }
  return (Entity *)0;
}

ConstPtr<Entity> ParserState::createUndefinedEntity(const StringC &name, const Location &loc)
{
  Text text;
  Ptr<Entity> entity(new InternalCdataEntity(name, loc, text));
  undefinedEntityTable_.insert(entity);
  return entity;
}

void ParserState::noteReferencedEntity(const ConstPtr<Entity> &entity,
				       Boolean foundInPass1Dtd,
				       Boolean lookedAtDefault)
{
  LpdEntityRef ref;
  ref.entity = entity;
  ref.lookedAtDefault = lookedAtDefault;
  ref.foundInPass1Dtd = foundInPass1Dtd;
  LpdEntityRef *old = lpdEntityRefs_.lookup(ref);
  if (!old)
    lpdEntityRefs_.insert(new LpdEntityRef(ref));
}

// Compare entity definitions.
// e1 is the original (will not be an external non-text entity).
// FIXME should look at generated sysids as well.
static
Boolean sameEntityDef(const Entity *e1, const Entity *e2)
{
  if (e1->dataType() != e2->dataType())
    return 0;
  const InternalEntity *i1 = e1->asInternalEntity();
  const InternalEntity *i2 = e2->asInternalEntity();
  if (i1) {
    if (!i2)
      return 0;
    if (i1->string() != i2->string())
      return 0;
    return 1;
  }
  else if (i2)
    return 0;
  const ExternalEntity *x1 = e1->asExternalEntity();
  const ExternalEntity *x2 = e2->asExternalEntity();
  const StringC *s1 = x1->externalId().systemIdString();
  const StringC *s2 = x2->externalId().systemIdString();
  if (s1) {
    if (!s2)
      return 0;
    if (*s1 != *s2)
      return 0;
  }
  else if (s2)
    return 0;
  s1 = x1->externalId().publicIdString();
  s2 = x2->externalId().publicIdString();
  if (s1) {
    if (!s2)
      return 0;
    if (*s1 != *s2)
      return 0;
  }
  else if (s2)
    return 0;
  return 1;
}

void ParserState::checkEntityStability()
{
  LpdEntityRefSetIter iter(lpdEntityRefs_);
  LpdEntityRef *ref;
  while ((ref = iter.next()) != 0) {
    ConstPtr<Entity> entity
      = dtd_[0]->lookupEntity(ref->entity->declType()
			      == Entity::parameterEntity,
			      ref->entity->name());
    if (entity.isNull() && ref->lookedAtDefault)
      entity = dtd_[0]->defaultEntity();
    if (entity.isNull()
	? ref->foundInPass1Dtd
	: !sameEntityDef(ref->entity.pointer(), entity.pointer()))
      message(((ref->entity->declType()
		== Entity::parameterEntity)
	       ? ParserMessages::unstableLpdParameterEntity
	       : ParserMessages::unstableLpdGeneralEntity),
	      StringMessageArg(ref->entity->name()));
  }
  {
    // Ensure that the memory is released.
    LpdEntityRefSet tem;
    lpdEntityRefs_.swap(tem);
  }
}    

Boolean ParserState::appendCurrentRank(StringC &str, const RankStem *stem)
     const
{
  const StringC &suffix = currentRank_[stem->index()];
  if (suffix.size() > 0) {
    str += suffix;
    return 1;
  }
  return 0;
}

void ParserState::setCurrentRank(const RankStem *stem, const StringC &suffix)
{
  currentRank_[stem->index()] = suffix;
}

void ParserState::getCurrentToken(const SubstTable<Char> *subst,
				  StringC &str) const
{
  InputSource *in = currentInput();
  const Char *p = in->currentTokenStart();
  size_t count = in->currentTokenLength();
  str.resize(count);
  StringC::iterator s = str.begin();
  for (; count > 0; --count)
    *s++ = (*subst)[*p++];
}

void ParserState::queueMessage(MessageEvent *event)
{
  if (cancelled()) {
    delete event;
    return;
  }
  if (keepingMessages_)
    keptMessages_.append(event);
  else
    handler_->message(event);
}

void ParserState::releaseKeptMessages()
{
  keepingMessages_ = 0;
  while (!keptMessages_.empty()) {
    if (cancelled()) {
      allDone();
      return;
    }
    handler_->message(keptMessages_.get());
  }
}

void ParserState::discardKeptMessages()
{
  keepingMessages_ = 0;
  keptMessages_.clear();
}

void ParserState::initMessage(Message &msg)
{
  if (inInstance()) {
    StringC rniPcdata = syntax().delimGeneral(Syntax::dRNI);
    rniPcdata += syntax().reservedName(Syntax::rPCDATA);
    getOpenElementInfo(msg.openElementInfo, rniPcdata);
  }
  msg.loc = currentLocation();
}

void ParserState::dispatchMessage(Message &msg)
{
  queueMessage(new MessageEvent(msg));
}

void ParserState::dispatchMessage(const Message &msg)
{
  queueMessage(new MessageEvent(msg));
}

AttributeList *
ParserState::allocAttributeList(const ConstPtr<AttributeDefinitionList> &def,
				unsigned i)
{
  if (i < attributeLists_.size())
    attributeLists_[i]->init(def);
  else {
    attributeLists_.resize(i + 1);
    attributeLists_[i] = new AttributeList(def);
  }
  return attributeLists_[i].pointer();
}

void ParserState::activateLinkType(const StringC &name)
{
  if (!hadPass2Start_ && !pass2_)
    activeLinkTypes_.push_back(name);
  else
    message(ParserMessages::linkActivateTooLate);
}

Boolean ParserState::shouldActivateLink(const StringC &name) const
{
  if (!activeLinkTypesSubsted_) {
    // FIXME use mutable
    ParserState *state = (ParserState *)this;
    for (size_t i = 0; i < activeLinkTypes_.size(); i++)
      for (size_t j = 0; j < activeLinkTypes_[i].size(); j++)
	syntax().generalSubstTable()->subst(state->activeLinkTypes_[i][j]);
    state->activeLinkTypesSubsted_ = 1;
  }
  for (size_t i = 0; i < activeLinkTypes_.size(); i++)
    if (name == activeLinkTypes_[i])
      return 1;
  return 0;
}

Ptr<Dtd> ParserState::lookupDtd(const StringC &name)
{
  for (size_t i = 0; i < dtd_.size(); i++)
    if (dtd_[i]->name() == name)
      return dtd_[i];
  return Ptr<Dtd>();
}

ConstPtr<Lpd> ParserState::lookupLpd(const StringC &name) const
{
  for (size_t i = 0; i < allLpd_.size(); i++)
    if (allLpd_[i]->name() == name)
      return allLpd_[i];
  return ConstPtr<Lpd>();
}

ConstPtr<Notation> ParserState::getAttributeNotation(const StringC &name,
						     const Location &)
{
  ConstPtr<Notation> notation;
  if (haveCurrentDtd())
    notation = currentDtd().lookupNotation(name);
  else if (resultAttributeSpecMode_) {
    const Dtd *resultDtd = defComplexLpd().resultDtd().pointer();
    if (!resultDtd)
      return 0;
    notation = resultDtd->lookupNotation(name);
  }
  return notation; 
}

ConstPtr<Entity> ParserState::getAttributeEntity(const StringC &str,
						 const Location &loc)
{
  ConstPtr<Entity> entity = lookupEntity(0, str, loc, 0);
  if (!entity.isNull()
      && entity->defaulted()
      && options().warnDefaultEntityReference) {
    setNextLocation(loc);
    message(ParserMessages::defaultEntityInAttribute,
	    StringMessageArg(str));
  }
  return entity;
}

Boolean ParserState::defineId(const StringC &str, const Location &loc,
			      Location &prevLoc)
{
  if (!inInstance() || !validate())
    return 1;
  Id *id = lookupCreateId(str);
  if (id->defined()) {
    prevLoc = id->defLocation();
    return 0;
  }
  id->define(loc);
  return 1;
}

void ParserState::noteIdref(const StringC &str, const Location &loc)
{
  if (!inInstance() || !options().errorIdref || !validate())
    return;
  Id *id = lookupCreateId(str);
  if (!id->defined())
    id->addPendingRef(loc);
}

void ParserState::noteCurrentAttribute(size_t i, AttributeValue *value)
{
  if (inInstance())
    currentAttributes_[i] = value;
}

ConstPtr<AttributeValue> ParserState::getCurrentAttribute(size_t i) const
{
  if (!inInstance())
    return ConstPtr<AttributeValue>();
  return currentAttributes_[i];
}

const Syntax &ParserState::attributeSyntax() const
{
  return syntax();
}

#ifdef SP_NAMESPACE
}
#endif
