<?php

  // Common tokens:
  define('Uwacko_TildeToken', '~');       // no PCRE-special chars.
  define('Uwacko_SubstrEscToken', '""');  // no PCRE-special chars.
  define('Uwacko_CompoundTokenSepar', '==');
  define('Uwacko_ForcedLineBreakToken', '---');
  define('Uwacko_LinkEndingSeparChar', '+');  // e.g.: See the ((standard+s)).
  define('Uwacko_AnchorChar', '#');      // must be 1 char long.
  define('Uwacko_FootnoteChar', '*');    // must be 1 char long.
  define('Uwacko_MaxFootnoteChars', 9);  // max allowed ((******)) in length.
  define('Uwacko_DeletePos', null);    // used in $positions[0] instead of $token, see Tokenize().

  // Element properties:
  define('Uwacko_StartTag', 1);          // can be combined with EndTag.
  define('Uwacko_EndTag', 2);            // all combinations of Start/EndTag are valid (<none>, s, e, se).
  define('Uwacko_SingleTag', 4);         // tag doesn't have an end tag; similar to XML's <tag />.
  define('Uwacko_EscTagAfterSelf', 8);   // tilde (~) - escapes one token if it's located right after ~, otherwise ~ is output as is.
  define('Uwacko_DoesNotDivide', 16);    // no node is created, tag's contents is appended to the last node.
  define('Uwacko_Callback', 32);         // TagClass::FindTokenCallback() is called each time the tag is encountered.
  define('Uwacko_NoTokensInside', 64);   // don't parse the contents; used in %%.
  define('Uwacko_NoTokensForFirstPart', 128);   // don't parse contents before (==|\s) - e.g. ((http://==link)).
  define('Uwacko_DiscardOnDiffToken', 256);     // if tag was opened but unclosed when EOF reached it'll be discarded instead of reparsing.
                                                // additionally it will be discarded if the token that follows it isn't its ending token.
  define('Uwacko_DeleteToken', 512);     // token will be removed from source (is used in ~ and "").

  // Special return values of FindTokenCallback:
  define('Uwacko_SkipThisToken', 1);
  define('Uwacko_ForceCloseCurrent', 2);

  // Pseudo class names used in handlers:
  define('Uwacko_OneTokenEscape', 1);    // it's tilde (~).
  define('Uwacko_SubstrEscape', 2);      // ""...""

abstract class Uwacko_Base extends UWikiBaseElement {
  public $anchorClass = 'Uwacko_Anchor';
  public $removeEscInside = false;  // ~/"" should only be removed in last leafs (e.g. text) or they won't affect deeper tokens.

  public $startToken, $endToken, $callbackResult;

  // can return special values (see Uwacko_* consts) or an array() with user data to store in $this->callbackResult.
  static function FindTokenCallback($doc, &$raw, &$positions, &$stack, &$token, &$pos, &$flags) { }

  function SetupFormats(&$formats, &$raw) {
    $formats or $formats = array( array(null => $this->DefaultStyle()) );

    $format = new UWikiFormat($this, $raw);
      $format->onError = array(null => 'pre');
      $format->AddFormats($formats);
      $format->RunImmediateFormats();
    $this->doc->root->formats[] = $format;
  }

  // unlike preg_quote() it escapes '#' which is a comment in /x mode.
  static function PregQuoteX($str) { return addcslashes($str, '.\\/+*?[^]$(){}=!<>|:-#'); }
}

class Uwacko_Text extends Utext_Root {
  public $htmlTag = null;
  public $htmlClasses = array();
  public $anchorClass = 'Uwacko_Anchor';
  public $removeEscInside = true;
  public $startToken, $endToken;
}

abstract class Uwacko_Tokenizer extends Uwacko_Base {
  public $handlerGroup;
  public $defaultTokenClass;

  function Parse() {
    parent::Parse();

    $this->children = array();
    $raw = "\n". trim($this->raw, "\r\n") ."\n";

    if (isset($raw[2])) {  // no point in parsing further if raw was empty.
      $positions = $this->Tokenize($raw, $this->settings->handlers->Get($this->handlerGroup),
                                   $this->defaultTokenClass);
      $this->Divide($raw, $positions);
    }
  }

    function &Tokenize(&$raw, $tokens, $defaultTokenClass, $offset = 1) {
      // format: position => array('token', 'class', $callbackResult, $endToken)
      // $token: null (delete token), other - the token.
      // $pos might have leading '.' to designate single tag's ending.
      // $endToken is for Uwacko_EndTag tokens.
      $positions = array();

        if (!preg_match_all($this->TokenRegExpFor($tokens), $raw, $matches,
                            PREG_SET_ORDER | PREG_OFFSET_CAPTURE, $offset)) {
          EUWikiLastPCRE::ThrowIfPcreFailed();
        } else {
            // format: {depth => [class, posAfterToken, flags, callbackResult, posBeforeToken]}.
            // callbackResult is set for Uwacko_Callback tokens. The deepest item has index 0.
            $stack = array();
            $skipTokenIfAtPos = null;
              $skipTokenEscaperLen = null;
            $compoundEnableParsingPos = null;

          foreach ($matches as &$row) {
            $tokenIndex = count($row) - 2; // [0] is the full match so we don't count it.

            list($token, $pos) = array_pop($row);
            list(, $class, $flags) = $tokens[$tokenIndex];

              if (self::IsEmptyStr($class)) {
                throw new EUverseWiki('Can\'t find token among defined handlers - this'.
                                      ' usually happens when you forget to use non-capturing'.
                                      ' brackets (?:...) instead of (...) in handler\'s PCRE.');
              }

              if ($skipTokenIfAtPos !== null) {
                if ($pos == $skipTokenIfAtPos and (empty($stack) or
                    ($stack[0][2] & Uwacko_NoTokensInside) == 0 or
                    (($stack[0][2] & Uwacko_NoTokensForFirstPart) != 0 and $compoundEnableParsingPos <= $pos) or
                    $stack[0][0] === $class or
                    $class === Uwacko_OneTokenEscape)) {
                  // removing tilde char that escaped the following tag we've now met.
                  $positions[$pos - $skipTokenEscaperLen] = array(Uwacko_DeletePos, $skipTokenEscaperLen);
                  continue;
                }

                $skipTokenIfAtPos = null;
              }

            if ($flags & Uwacko_Callback) {
              $callbackResult =
                call_user_func_array(array($class, 'FindTokenCallback'),
                                     array($this->doc, &$raw, &$positions, &$stack, &$token, &$pos, &$flags));

              if (!is_array($callbackResult)) {
                if ($callbackResult === Uwacko_SkipThisToken) {
                  continue;
                } elseif ($callbackResult === Uwacko_ForceCloseCurrent) {
                  $flags |= Uwacko_EndTag;
                  $class = $stack[0][0];
                }

                $callbackResult = null;
              }
            } else {
              $callbackResult = null;
            }

            if (!empty($stack) and ($stack[0][2] & Uwacko_DiscardOnDiffToken)) {
              if (($flags & Uwacko_EndTag) == 0 or $class != $stack[0][0]) {
                if (!isset($stack[1]) and ($stack[0][2] & Uwacko_DoesNotDivide) == 0) {
                  array_pop($positions);
                }

                array_shift($stack);
              }
            }

              if (!empty($stack)) {
                if ($flags & Uwacko_EndTag and $class == $stack[0][0]) {
                  if ($flags & Uwacko_DeleteToken) {
                      $tokenLen = strlen($token);
                      $tokStartPos = $stack[0][1] - $tokenLen;

                    // There might have been something between the start and end tags of "" - e.g.:
                    // ""~""""  - tilde remains before the end tag of "". We can't use ksort() because
                    // it will behave strangely on string keys (e.g. '2.' - used for single tags),
                    // sometimes putting them before, sometimes after numeric.
                    $reinsert = array();

                      $posKeys = array_keys($positions);
                      while ($reinsPos = array_pop($posKeys) and $reinsPos > $tokStartPos) {
                        $reinsert[$reinsPos] = array_pop($positions);
                      }

                    $positions[$tokStartPos] = array(Uwacko_DeletePos, $tokenLen);
                    $positions += $reinsert;
                    $positions[$pos] = array(Uwacko_DeletePos, $tokenLen);
                  }

                  isset($positions[ $stack[0][4] ]) and $positions[ $stack[0][4] ][3] = $token;
                  array_shift($stack);
                  $class = $defaultTokenClass;
                  $flags &= ~Uwacko_StartTag & ~Uwacko_EndTag & ~Uwacko_DiscardOnDiffToken;
                  $compoundEnableParsingPos = null;
                } elseif (($stack[0][2] & Uwacko_NoTokensInside)
                          and ($flags & Uwacko_EscTagAfterSelf) === 0) {
                  if (!$compoundEnableParsingPos or $pos < $compoundEnableParsingPos) {
                    continue;
                  } else {
                    $compoundEnableParsingPos = null;
                  }
                }
              }

            if (empty($stack) and ($flags & Uwacko_DoesNotDivide) == 0) {
              if (($flags & Uwacko_EndTag) == 0 or ($flags & Uwacko_StartTag)) {
                $positions[$pos] = array($token, $class, $callbackResult, null);

                if ($flags & Uwacko_SingleTag) {
                  $positions['.'.$pos] = array($token, $defaultTokenClass, null, null);
                }
              }
            }

            if ($flags & Uwacko_StartTag) {
              array_unshift( $stack, array($class, $pos + strlen($token), $flags, $callbackResult, $pos) );

              if ($flags & Uwacko_NoTokensForFirstPart) {
                $stack[0][2] |= Uwacko_NoTokensInside;
                $compoundEnableParsingPos = null;

                $part = substr($raw, $pos, $this->settings->firstTokenPartLookAheadLength);
                  if (($delimPos = strpos($part, Uwacko_CompoundTokenSepar)) !== false) {
                    $compoundEnableParsingPos = $pos + $delimPos + strlen(Uwacko_CompoundTokenSepar);
                  } elseif (($delimPos = strpos($part, ' ')) !== false) {
                    $compoundEnableParsingPos = $pos + $delimPos + 1;
                  }
              }
            } elseif ($flags & Uwacko_EscTagAfterSelf) {
              $skipTokenEscaperLen = strlen($token);
              $skipTokenIfAtPos = $pos + $skipTokenEscaperLen;
            }
          }

          $reparseFrom = $this->AfterTokenizing($raw, $stack, $positions);
          if ($reparseFrom) {
            $posArray = array_keys($positions);
              while (array_pop($posArray) >= $reparseFrom) { array_pop($positions); }
            $positions += $this->Tokenize($raw, $tokens, $defaultTokenClass, $reparseFrom + 1);
          }
        }

      return $positions;
    }

      function TokenRegExpFor(&$tokens) {
        $regexp = '/';
          foreach ($tokens as $token) { $regexp .= "($token[0])|"; }
        return substr($regexp, 0, -1) .'/xu';
      }

      function AfterTokenizing(&$raw, &$stack, &$positions) {
        while (!empty($stack) and ($stack[0][2] & Uwacko_DiscardOnDiffToken)) {
          $deepest = array_shift($stack);
          if (empty($stack) and ($deepest[2] & Uwacko_DoesNotDivide) === 0) { array_pop($positions); }
        }

        if (!empty($stack)) {
          if (count($stack) > 1 or ($stack[0][2] & Uwacko_DoesNotDivide) == 0) {
            while ($last = self::LastIn($positions) and $last[0] == Uwacko_DeletePos) {
              array_pop($positions);
            }
            array_pop($positions);
          }

          $pos = self::LastIn($stack);
          return $pos[4];
        }
      }

  function Divide(&$raw, &$positions) {
    // Note: $this->children MUST be updated on the way in contrast to keeping
    // a local array and then assigning it to $this->children - this is crucial
    // for correct treePositions assignment (otherwise doc tree would be empty
    // when the position is calculated).
    $children = &$this->children;

        $lastPos = 0;
        $last = $children[] = $this->NewElement($this->defaultTokenClass);
        $toDelete = array();
      foreach ($positions as $pos => $info) {
        if ($info[0] === Uwacko_DeletePos) {
          $toDelete[] = array($pos, $info[1]);
        } else {
          list($startToken, $class, $callbackResult, $endToken) = $info;

            if ($pos[0] === '.') {
              // this is a single tag's end.
              $toDelete = array();
            } else {
              $last->SetRaw( substr($raw, $lastPos, $pos - $lastPos) );
              if ($lastPos === 0) {
                $last->source = $last->startToken.substr($last->raw, 1).$last->endToken;
              } else {
                $last->source = $last->startToken.$last->raw.$last->endToken;
              }

              self::DeleteIn($last, $toDelete, -1 * $lastPos);
              if ($last->raw === '') { array_pop($children); }

              $lastPos = $pos + strlen($startToken);
            }

          $last = $children[] = $this->NewElement($class);
          $last->startToken = $startToken;
          $last->endToken = $endToken;
          $last->callbackResult = $callbackResult;
        }
      }
        $last->SetRaw( substr($raw, $lastPos) );
        $last->source = $last->startToken.substr($last->raw, $lastPos === 0 ? 1 : 0, -1);
        self::DeleteIn($last, $toDelete, -1 * $lastPos);

      $this->CleanChildrenBeforeParsing($children);

    for ($i = 0; isset($children[$i]); ++$i) {
      $replaceWith = $children[$i]->Parse();
      if (is_array($replaceWith)) {
        array_splice($children, $i, 1, $replaceWith);
        $i += count($replaceWith) - 1;
      }
    }
  }

    static function DeleteIn(&$obj, &$toDelete, $posDelta) {
      if ($obj->removeEscInside) {
        $raw = &$obj->raw;
        while (($pos = array_pop($toDelete)) !== null) {
          self::DeleteSubstrIn($raw, $pos[0] + $posDelta, $pos[1]);
        }
      }

      $toDelete = array();
    }

    function CleanChildrenBeforeParsing(&$children) {
      // removing two \n added to $raw in Parse().
      if (self::DeleteSubstrIn($children[0]->raw, 0) === '') {
        array_shift($children);
      } else {
        $children[0]->originalRaw = $children[0]->raw;
      }

      $last = self::LastIn($children);
      if (self::DeleteSubstrIn($last->raw, -1) === '') {
        array_pop($children);
      } else {
        $last->originalRaw = $last->raw;
      }
    }
}
