/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence,  or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "quote.h"
#include "context.h"
#include "directory.h"
#include "family.h"
#include "field.h"
#include "element.h"
#include "page.h"

void element::swap (element& e) noexcept
{   element_.swap (e.element_);
    node_.swap (node_);
    ::std::swap (examined_, e.examined_);
    ::std::swap (page_, e.page_);
    ::std::swap (parent_, e.parent_);
    sibling_.swap (e.sibling_);
    child_.swap (e.child_);
    mf_.swap (e.mf_);
    name_.swap (e.name_);
    ::std::swap (ids_, e.ids_);
    diagnosis_.swap (e.diagnosis_); }

element_ptr element::child (const int depth)
 {  assert (has_child ());
    if (! child_)
        child_.reset (new element (name_, node_.child (), this, ids_, page_, depth));
    return child_; }

element_ptr element::next (const int depth)
 {  assert (has_next ());
    if (! sibling_)
        sibling_.reset (new element (name_, node_.next (), parent_, ids_, page_, depth));
    return sibling_; }

void element::set_page (page* p)
{   page_ = p; }

myhtml_tag_id_t element::check_bespoke_tag (myhtml_tag_id_t tag, const int n)
{   if (page_ != nullptr) if ((tag == MyHTML_TAG__UNDEF) || (tag >= MyHTML_TAG__END_OF_FILE))
    {   ::std::string name = node_.name ();
        if (! name.empty ())
        {   if (context.tell (e_debug))
            {   diagnosis_ << "* " << ::std::hex << tag << " (" << name << ")";
                if (context.tell (e_detail)) diagnosis_ << " " << node_.report (n);
                diagnosis_ << "\n"; }
            tag = page_ -> check_bespoke_tag (tag, name, diagnosis_);
            if (tag >= TAG_FIRST_NEW_ELEM)
            {   element_.set_tag (tag);
                node_.set_tag (tag); } } }
    return tag; }

void element::examine_self (const directory& d, const int depth)
{   if (examined_) return;
    myhtml_tag_id_t tag = check_bespoke_tag (node_.tag (), depth);
    if (context.tell (e_splurge))
        diagnosis_ << ::std::string (depth*STEPSPACES, ' ') << "- " << ::std::hex << node_.tag () << "=" << ::std::hex << tag << "\n";
    bool postprocess = false;
    if (! element_.invalid ())
    {   if (context.links ())
        {   if (element_.has_url ())
                element_.verify_url (d);
            element_.invalid_id (ids_); }
        if (tag == MyHTML_TAG_BASE) examine_base ();
        postprocess = examine_class ();
        if (element_.has < attr_rel > ())
            examine_rel (element_.get_value < attr_rel > ());
        if (element_.has < attr_rev > ())
            examine_rel (element_.get_value < attr_rev > ());
        if (mf_ && element_.has < attr_href > ())
        {   ::std::string href (element_.get_value < attr_href > ());
            if  (mf_ -> allocated (rel_webmention))
                context.webmention (href, tag == MyHTML_TAG_LINK ? wm_link : wm_addr);
            if (mf_ -> allocated (rel_in_reply_to))
                context.note_reply (name_, element_.get_value < attr_id > (), href, node_.content (false, false)); } }
    examine_children (d, depth);
    if (postprocess)
        if (mf_ && mf_ -> allocated (h_entry))
        {   note_reply < in_reply_to_at > ();
            note_reply < like_of_at > ();
            note_reply < repost_of_at > ();
            note_reply < rsvp_at > ();
            note_reply < bookmark_of_at > (); }
    examined_ = true; }

bool element::to_sibling (element_ptr& e, const int depth)
{   if (! e -> has_next ()) return false;
    element_ptr x (e -> next (depth));
    x.swap (e);
    return true; }

void element::examine_children (const directory& d, const int depth)
{   if (has_child ())
    {   element_ptr e = child (depth);
        if (context.tell (e_detail))
        {   ::std::ostringstream tree;
            tree.setf (::std::ios_base::hex, ::std::ios_base::basefield);
            tree << node_.tag () << ":";
            do
            {   tree << " " <<  e -> node_.tag (); }
            while (to_sibling (e, depth));
            ::std::cout << tree.str () << "\n"; }
        do
        {   e -> examine_self (d, depth+1); }
        while (to_sibling (e, depth)); } }

template < class PROPERTY > void mf_postprocess_entry (const microformats_ptr& mf, e_wm_status& wms, ::std::string& mention)
{   if (wms < wm_addr)
        if (! is_microformat_property_empty < mf_entry, PROPERTY > (mf))
        {   mention = get_microformat_property_value < mf_entry, PROPERTY > (mf);
            wms = wm_addr; } }

void element::seek_webmention (::std::string& mention, e_wm_status& wms)
{   myhtml_tag_id_t tag = node_.tag ();
    bool postprocess = false;
    if (! element_.invalid ())
    {   if (element_.has < attr_class > ()) postprocess = examine_class ();
        if (element_.has < attr_rel > ()) examine_rel (element_.get_value < attr_rel > ());
        if (element_.has < attr_href > ())
        {   ::std::string ref = element_.get_value < attr_href > ();
            if  (mf_ && mf_ -> has (rel_webmention))
            {   if (tag == MyHTML_TAG_LINK)
                {   mention = ref; wms = wm_link; return; }
                else if (wms < wm_addr)
                {   mention = ref; wms = wm_addr; } } } }
    for (element_ptr e = child_; e && (wms != wm_link); e = e -> sibling_)
        e -> seek_webmention (mention, wms);
    if (wms != wm_link)
        if (postprocess)
            if (mf_ && mf_ -> has (h_entry))
            {   mf_postprocess_entry < in_reply_to_at > (mf_, wms, mention);
                mf_postprocess_entry < like_of_at > (mf_, wms, mention);
                mf_postprocess_entry < repost_of_at > (mf_, wms, mention);
                mf_postprocess_entry < rsvp_at > (mf_, wms, mention);
                mf_postprocess_entry < bookmark_of_at > (mf_, wms, mention); } }

::std::string element::find_webmention (const int depth)
{   ::std::string mention;
    e_wm_status wms = wm_undefined;
    if (has_child ())
    {   element_ptr e = child (depth);
        do
        {   e -> seek_webmention (mention, wms); }
        while (wms != wm_link && to_sibling (e, depth)); }
    return mention; }

found_farm element::find_farm (const e_property p, element* starter) const
{   if (! is_top ())
    {   if (starter == nullptr) starter = parent ();
        else starter = starter -> parent ();
        for (element* ancestor = starter; (ancestor != nullptr) && ! ancestor -> is_top (); ancestor = ancestor -> parent ())
            if (ancestor -> mf_)
            {   e_vocabulary v = ancestor -> mf_ -> plausible_vocabulary (p);
                if (v != v_unknown) return found_farm (ancestor, v);
                if (ancestor -> is_top ()) break; } }
    return found_farm (nullptr, v_unknown); }

void element::examine_base ()
{   assert (element_.holds_element < base_element > ());
    url u (element_.get_value < attr_href > ());
    if (! u.empty ())
        if (! u.is_local ())
        {   if (context.tell (e_warning)) diagnosis_ << node_.name () << ": " << context.filename ()  << " has offsite base " << quote (u.original ()) << ", abandoning check\n";
            return; }
        else if (u.has_component (es_fragment) || u.has_component (es_query))
        {  if (context.tell (e_error)) diagnosis_ << node_.name () << ": " << context.filename ()  << " ignoring bizarre base " << quote (u.original ()) << "\n"; }
        else context.base (u.original ()); }

bool element::examine_class ()
{   if (! context.microformats ()) return true;
    ::std::string content (element_.get_value < attr_class > ());
    if (content.empty ()) return false;
    vstr_t entries;
    bool res = false;
    ::boost::algorithm::split (entries, content, ::boost::algorithm::is_any_of (" "), ::boost::algorithm::token_compress_on);
    for (auto x : entries)
    {   vocab v (x);
        if (! v.invalid ())
        {   if (v.is_rel ())
            {   if (context.mf_verify () && context.tell (e_warning)) diagnosis_ << node_.name () << ": " << "microformats rel " << quote (v.name ()) << " declared as class\n"; }
            else
            {   if (v.whoopsie ()) diagnosis_ << node_.name () << ": " << v.diagnosis () << "\n";
                activate_microformats ();
                if (! mf_ -> is_relational () && ! v.unknown ())
                        if (mf_ -> is_declared (v.get ()))
                    {   if (context.mf_verify () && context.tell (e_warning)) diagnosis_ << node_.name () << ": " << "ignoring duplicate declaration of " << quote (v.name ()) << "\n"; }
                    else
                    {   if (context.mf_verify () && context.tell (e_comment))
                        {   diagnosis_ << node_.name () << ": " << "microformat vocabulary " << quote (v.name ()) << " found";
                            if (context.tell (e_debug)) diagnosis_ << " (" << v.get () << ")";
                            diagnosis_ << "\n"; }
                        mf_ -> declare (v);
                        mf_ -> validate_element (tag ());
                        res = true; } } }
        prop p (x);
        if (! p.invalid ())
        {   if (p.whoopsie ()) diagnosis_ << node_.name () << ": " << p.diagnosis () << "\n";
            found_farm farm = find_farm (p.get ());
            element* prop_vocab_element = nullptr;
            if (! v.invalid () && ! v.unknown () && is_plausible_field (v.get (), p.get ()))
            {   mf_ -> set_mf_value (v.get (), p.get (), *this);
                if (context.mf_verify () && context.tell (e_debug))
                {   diagnosis_ << node_.name () << ": " << "microformat property " << p.name ();
                    if (context.tell (e_detail)) diagnosis_ << " (" << p.get () << ")";
                    diagnosis_  << " set to " << quote (mf_ -> get_value (v.get (), p.get ())) << "\n"; }
                mf_put_vocab (v, p);
                prop_vocab_element = this;
                res = true; }
            else
            {   if (farm.second == v_unknown)
                {   if (context.mf_verify () && context.tell (e_error)) diagnosis_ << node_.name () << ": " << "cannot find vocabulary for " << quote (p.name ()) << "\n"; }
                else if (! is_plausible_field (farm.second, p.get ()))
                {   if (context.mf_verify () && context.tell (e_error)) diagnosis_ << node_.name () << ": " << "cannot find plausible vocabulary for " << quote (p.name ()) << "\n"; }
                else
                {   assert (farm.first -> mf_);
                    farm.first -> mf_ -> set_mf_value (farm.second, p.get (), *this);
                    if (context.mf_verify () && context.tell (e_comment))
                    {   diagnosis_ << node_.name () << ": " << vocab::name (farm.second) << " property " << p.name ();
                        if (context.tell (e_detail)) diagnosis_ << " (" << p.get () << ")";
                        diagnosis_  << " set to " << quote (farm.first -> mf_ -> get_value (farm.second, p.get ())) << "\n"; }
                    prop_vocab_element = farm.first;
                    farm.first -> mf_put_vocab (farm.second, p);
                    res = true; } }
            if (! v.invalid () && ! v.unknown () && (farm.second != v_unknown) && (prop_vocab_element != nullptr))
            {   assert (farm.first -> mf_);
                found_farm ancestral_farm = find_farm (p.get (), prop_vocab_element);
                if ((ancestral_farm.second != v_unknown) && (is_plausible_parent (farm.second, ancestral_farm.second, p.get (), diagnosis_)))
                {   assert (ancestral_farm.first -> mf_);
                    ancestral_farm.first -> mf_ -> set_mf_value (farm.second, p.get (), *prop_vocab_element);
                    if (context.mf_verify () && context.tell (e_comment))
                    {   diagnosis_ << node_.name () << ": " << "parental " << vocab::name (ancestral_farm.second) << " property " << p.name ();
                        if (context.tell (e_detail)) diagnosis_ << " (" << p.get () << ")";
                        diagnosis_  << " set to " << quote (ancestral_farm.first -> mf_ -> get_value (ancestral_farm.second, p.get ())) << "\n"; }
                    ancestral_farm.first -> mf_put_vocab (ancestral_farm.second, p,
                        EXPORT_ITEMPROP EXPORT_STRSEP EXPORT_ITEM EXPORT_STRSEP EXPORT_TYPE,
                        EXPORT_ITEMPROP EXPORT_STRSEP EXPORT_ITEM EXPORT_STRSEP EXPORT_PROPS);
                    res = true; } } } }
    return res; }

bool element::examine_rel (const ::std::string& content)
{   if (! context.microformats ()) return true;
    if (content.empty ()) return false;
    vstr_t entries, ve;
    bool res = false;
    vocab vr;
    prop vp;
    ::boost::algorithm::split (entries, content, ::boost::algorithm::is_any_of (" "), ::boost::algorithm::token_compress_on);
    ve.reserve (entries.size ());
    for (auto x : entries)
    {   vocab r (x);
        if (! r.invalid ())
        {   activate_microformats ();
            if (r.unknown ())
            {   if (r.whoopsie ()) diagnosis_ << node_.name () << ": " << r.diagnosis () << "\n";
                else if (context.mf_verify () && context.tell (e_comment)) diagnosis_ << node_.name () << ": " << "unknown rel type " << quote (x) << " encountered\n"; }
            else if (! r.is_rel ())
            {   if (context.mf_verify () && context.tell (e_warning)) diagnosis_ << node_.name () << ": " << "microformats class " << quote (r.name ()) << " declared as rel\n"; }
            else if (mf_ -> is_declared (r.get ()))
            {   if (context.mf_verify () && context.tell (e_warning)) diagnosis_ << node_.name () << ": " << "ignoring duplicate declaration of " << quote (r.name ()) << "\n"; }
            else
            {   mf_ -> declare (r);
                mf_ -> validate_element (tag ());
                if (context.mf_verify () && context.tell (e_comment))
                {   diagnosis_ << node_.name () << ": " << "rel type " << quote (r.name ());
                    if (context.tell (e_variable)) diagnosis_ << " (" << r.get () << ")";
                    diagnosis_ << " found\n"; }
                prop p (x);
                if (! p.invalid ())
                {   if (p.whoopsie ()) diagnosis_ << node_.name () << ": " << p.diagnosis () << "\n";
                    mf_ -> set_mf_value (r, p.get (), *this);
                    if (ve.empty ()) { vr = r; vp = p; }
                    ve.push_back (x); }
                res = true; } } }
    if (! ve.empty ()) mf_put_rel (vr, vp, ve);
    return res; }


void element::verify_children (const int depth)
{   if (has_child ())
    {   element_ptr e = child (depth);
        do
        {   e -> verify (depth+1); }
        while (to_sibling (e, depth)); } }

void element::verify (const int depth)
{   assert (ids_ != nullptr);
    element_.verify (ids_);
    verify_children (depth); }


template < class PROPERTY > struct fmi
{   static ::std::string xgv (const microformats_ptr& mf, ::std::string& s)
    {   if (s.empty ())
            s = get_microformat_property_value < mf_entry, PROPERTY > (mf);
        return s; } };

::std::string element::find_mention_info (const url& target, bool text, bool anything)
{   ::std::string s;
    if (mf_)
    {   if (mf_ -> is_declared (h_entry))
        {   s = fmi < in_reply_to_at > :: xgv (mf_, s);
            s = fmi < like_of_at > :: xgv (mf_, s);
            s = fmi < repost_of_at > :: xgv (mf_, s);
            s = fmi < rsvp_at > :: xgv (mf_, s);
            s = fmi < bookmark_of_at > :: xgv (mf_, s);
            if (! s.empty ()) if (target.tismoi (s))
                return node_.content (text, anything);
            s.clear (); }
        if (mf_ -> is_declared (rel_in_reply_to))
            s = get_microformat_property_value < mf_in_reply_to, in_reply_to_rt > (mf_);
        if (! s.empty ())
            if (target.tismoi (s))
                return node_.content (text, anything); }
    for (element_ptr e = child_; e && s.empty (); e = e -> sibling_)
        s = e -> find_mention_info (target, text, anything);
    return s; }

::std::string element::report (const int n)
{   ::std::ostringstream res;
    if ((n == 0) || reportable ())
    {   res << node_.report (n) << element_.report (n);
        if (! diagnosis_.str ().empty ())
        {   ::std::string sol (fyi (n));
            ::std::string diagnosis = diagnosis_.str ();
            diagnosis.erase (diagnosis.end () - 1);
            res << sol << ::boost::replace_all_copy (diagnosis, "\n", ::std::string ("\n") + sol) << "\n"; }
        res << element_.diagnose (n);
        for (element_ptr e = child_; e; e = e -> sibling_)
            res << e -> report (context.tell (e_comment) ? (n + 1) : n); }
        if (mf_) res << mf_ -> report (n) << mf_ -> diagnose (n);
    return res.str (); }

void element::clear ()
{   name_.clear ();
    mf_.reset ();
    for (element_ptr e = child_; e; e = e -> sibling_)
        e -> clear ();
    child_.reset (); }

element::~element ()
{   clear (); }

void element::mf_put_vocab (const e_vocabulary v, const prop& p, const ::std::string& itemtype, const ::std::string& itemprop)
{   assert (! p.invalid ());
    if (context.mf_export ())
    {   ::std::string val (mf_ -> get_value (v, p.get ()));
        if (! val.empty ())
        {   const ::std::string vs (vocab::name (v));
            if (context.tell (e_debug)) diagnosis_ << "putting " << quote (vs) << " in json at " << itemtype << "\n";
            page_ -> export_item (itemtype, vs);
            ::std::string naam (itemprop);
            naam += EXPORT_SEP;
            naam += p.name ();
            if (context.tell (e_debug)) diagnosis_ << "putting " << quote (val) << " in json at " << naam << "\n";
            page_ -> export_item (naam, val); } } }

void element::mf_put_rel (const e_vocabulary v, const prop& p, const vstr_t& rels)
{   assert (! p.invalid ());
    if (context.mf_export ())
    {   ::std::string val (mf_ -> get_value (v, p.get ()));
        if (! val.empty ())
        {   ::std::string hreflang, media, text (element_.content (true, false)), title, type;
            assert (! rels.empty ());
            if (element_.has < attr_hreflang > ()) hreflang = element_.get_value < attr_hreflang > ();
            if (element_.has < attr_media > ()) media = element_.get_value < attr_media > ();
            if (element_.has < attr_title > ()) title = element_.get_value < attr_title > ();
            if (element_.has < attr_type > ()) type = element_.get_value < attr_type > ();
            page_ -> export_rel (val, hreflang, media, rels, text, title, type);
            if (context.tell (e_debug))
                diagnosis_ << "export_rel " << quote (val) << ", " << hreflang << ", " << media << ", " <<
                    quote (rels) << ", " << quote (text) << ", " << quote (title) << ", " << type << "\n"; } } }

::std::string element::find_date_value () const
{   if (element_.valid < attr_value > () && element_.valid < attr_class > ())
        return element_.get_value < attr_value > ();
    switch (tag ())
    {   case MyHTML_TAG_TIME :
        case MyHTML_TAG_INS :
        case MyHTML_TAG_DEL :
            if (element_.valid < attr_datetime > ()) return element_.get_value < attr_datetime > ();
            break;
        case MyHTML_TAG_ABBR :
            if (element_.valid < attr_title > ()) return element_.get_value < attr_title > ();
            break;
        case TAG_DATA :
        case MyHTML_TAG_INPUT :
            if (element_.valid < attr_value > ()) return element_.get_value < attr_value > ();
            break; }
    return element_.content (true, false); }

::std::string element::find_text_value () const
{   if (element_.valid < attr_value > () && element_.valid < attr_class > ())
        return element_.get_value < attr_value > ();
    switch (tag ())
    {   case MyHTML_TAG_ABBR :
        case MyHTML_TAG_LINK :
            if (element_.valid < attr_title > ()) return element_.get_value < attr_title > ();
            break;
        case TAG_DATA :
        case MyHTML_TAG_INPUT :
            if (element_.valid < attr_value > ()) return element_.get_value < attr_value > ();
            break;
        case MyHTML_TAG_IMG :
        case MyHTML_TAG_AREA :
            if (element_.valid < attr_alt > ()) return element_.get_value < attr_alt > ();
            break; }
    return element_.content (true, false); }

::std::string element::find_url_value () const
{   switch (tag ())
    {   case MyHTML_TAG_A :
        case MyHTML_TAG_AREA :
        case MyHTML_TAG_LINK :
            if (! element_.invalid < attr_href > ()) return element_.get_value < attr_href > ();
            break;
        case MyHTML_TAG_AUDIO :
        case MyHTML_TAG_SOURCE :
        case MyHTML_TAG_IFRAME :
        case MyHTML_TAG_IMG :
            if (element_.valid < attr_src > ()) return element_.get_value < attr_src > ();
            break;
        case MyHTML_TAG_VIDEO :
            if (element_.valid < attr_src > ()) return element_.get_value < attr_src > ();
            if (element_.valid < attr_poster > ()) return element_.get_value < attr_poster > ();
            break;
        case MyHTML_TAG_OBJECT :
            if (element_.valid < attr_data > ()) return element_.get_value < attr_data > ();
            break; }
    if (element_.valid < attr_value > () && element_.valid < attr_class > ()) return element_.get_value < attr_value > ();
    switch (tag ())
    {   case MyHTML_TAG_ABBR :
            if (element_.valid < attr_title > ()) return element_.get_value < attr_title > ();
            break;
        case TAG_DATA :
        case MyHTML_TAG_INPUT :
            if (element_.valid < attr_value > ()) return element_.get_value < attr_value > ();
            break; }
    return element_.content (true, false); }

::std::string element::find_html_value () const
{   return element_.content (true, false); }
