/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence,  or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "common.h"
#include "directory.h"
#include "element.h"
#include "page.h"
#include "url.h"
#include "url_sanitise.h"
#include "element_myhtml.h"

void page::init ()
{   html_ = myhtml_create ();
    if (html_ == nullptr) return;
    if (MyHTML_STATUS_OK != myhtml_init (html_, MyHTML_OPTIONS_DEFAULT, 1, 4096)) return;
    tree_ = myhtml_tree_create ();
    if (tree_ == nullptr) return;
    if (MyHTML_STATUS_OK != myhtml_tree_init (tree_, html_))
    {   myhtml_tree_destroy (tree_);
        tree_ = nullptr; } }

page::page (const ::std::string& name, const ::std::string& content, const myencoding_t encoding) : tree_ (nullptr), html_ (nullptr), name_ (name)
{   init ();
    if (! parse (content, encoding)) zap (); }

void page::zap ()
{   document_.reset ();
    if (tree_ != nullptr)
    {   myhtml_tree_clean (tree_);
        myhtml_tree_destroy (tree_);
        tree_ = nullptr; }
    if (html_ != nullptr)
    {   myhtml_clean (html_);
        myhtml_destroy (html_);
        html_ = nullptr; } }

bool page::parse (const ::std::string& content, const myencoding_t encoding)
{   if (invalid ()) return false;
    if (MyHTML_STATUS_OK != myhtml_parse (tree_, encoding, content.c_str (), content.length ())) return false;
    if (context.tell (e_structure)) ::std::cout << rpt_structure (tree_);
    return true; }

void page::examine (const directory& d, const int depth)
{   if (! document_)
    {   document_.reset (new element (name_, myhtml_tree_get_document (tree_), nullptr, &ids_, this, depth));
        document_ -> examine_self (d, depth);
        document_ -> verify (depth); } }

::std::string page::find_webmention (const int depth) const
{   assert (document_);
    return document_ -> find_webmention (depth); }

::std::string page::find_mention_info (const url& u, bool text, bool anything)
{   assert (document_);
    return document_ -> find_mention_info (u, text, anything); }

::std::string page::report () const
{   ::std::ostringstream res;
    if (context.tell (e_variable))
    {   res << "html_ ";
        if (html_ == nullptr) res << "null";
        else res << html_;
        res << "\ntree_ ";
        if (html_ == nullptr) res << "null";
        else res << tree_;
        res << "\n"; }
    res << document_ -> report (0);
    return res.str () ; }

::std::string unescape (size_t pc, const ::std::string& url)
{   struct escape_t
    {   char encoding [4];
        char ch; }
    encoded [] =
    {   { "%20", ' ' },
        { "%23", '#' },
        { "%24", '$' },
        { "%25", '%' },
        { "%26", '&' },
        { "%2F", '/' },
        { "%3A", ':' },
        { "%3B", ';' },
        { "%3C", '<' },
        { "%3D", '=' },
        { "%3E", '>' },
        { "%3F", '?' },
        { "%40", '@' },
        { "%5B", '[' },
        { "%5C", '\\'},
        { "%5D", ']' },
        { "%5E", '^' },
        { "%60", '\''},
        { "%7B", '{' },
        { "%7C", '|' },
        { "%7D", '}' },
        { "%7E", '~' } };
    constexpr size_t max = sizeof (encoded) / sizeof (escape_t);
    ::std::string res (url);
    while (pc != ::std::string::npos)
    {   for (int i = 0; i < max; ++i)
            if (compare_no_case (res.substr (pc, 3), encoded [i].encoding))
            {   res.replace (pc, 3, ::std::string (1, encoded [i].ch));
                break; }
        pc = res.find ('%', pc + 1); }
    return res; }

myhtml_tag_id_t page::check_bespoke_tag (myhtml_tag_id_t tag, const ::std::string& name, ::std::ostringstream& ss)
{   if (((tag != MyHTML_TAG__UNDEF) && (tag < MyHTML_TAG__END_OF_FILE)) || name.empty ()) return tag;
    elem e (name);
    if (! e.unknown ()) return e.get ();
    return elem::add_tag (name, ss); }

::std::string get_page_url (const ::std::string& url)
{   ::std::string res (sanitise (url));
    size_t pc = res.find_first_of ("?#=");
    if (pc != ::std::string::npos)
        res = res.substr (0, pc);
    return res; }
