Parser.Cpp

Comparing C++ And Perl

#include "Parser.h"
#include <boost/regex.hpp>
#include <iostream>
#include <sstream>
#include <fstream>
#include "MyException.h"
#include "Bibitem.h"

using namespace std;

static bool clean(string& s){
    static const boost::regex no_ws("\\s*([-\\/\\d\\w]+)\\s*");
    boost::cmatch matches;
    if (boost::regex_match(s.c_str(), matches, no_ws)){
        string clean_s( matches[1].first, 
                        matches[1].second );
        s = clean_s;
        return true;
    }
    return false;
}

void Parser::canonicalize_and_verify_entries(
    vector <Bibitem> &entries,
    Technologies const &technologies)
{
    // for each entry make sure the technology is set and valid
    vector <Bibitem>::iterator it;
    for (it = entries.begin(); it !=entries.end(); ++it){

        // find the technology value
        string tech;
        if (it->get("technology", tech)){

            // ensure than the technology value is valid
            int i;
            if (!technologies.entry_num(tech, i)){
                throw MyException(
                    "entry has unknown technology information: " + 
                    tech);
            }
        }
        else{
            throw MyException(
                "entry has no technology information");
        }

    }
}

void Parser::save(
    vector <Bibitem > &entries,
    Bibitem &current)
{
    // only store entries which actually have a title
    // and authors
    string title;
    if (!current.get("title", title)) return;
    string authors;
    if (!current.get("authors", authors)) return;

    // store the entry number
    ostringstream oss;
    oss << 1+entries.size();
    current.put("entry_number", oss.str());

    // get the first author
    static const boost::regex delim(",");
    boost::sregex_token_iterator i( 
        authors.begin(), authors.end(), delim, -1 ), end;
    string first_author(*i);

    // save the surname of the first author
    static const boost::regex name(".*\\. ([-'\\w]+)\\s*");
    boost::cmatch matches;
    if (boost::regex_match(first_author.c_str(), matches, name)){
        string surname( matches[1].first, 
                        matches[1].second );
        current.put("surname", surname);
    }
    else{
        throw MyException("Surname can't be extracted: "+first_author+" for "+title);
    }

    // copy the bibitem to the entries vector
    entries.push_back( current );
}

void Parser::append( 
    string &s, 
    string const &tag, 
    Bibitem &current)
{
    // dont append blank lines
    static const boost::regex ws("\\s*");
    if (boost::regex_match(s, ws)) return;

    // remove whitespace from before & after s
    clean(s);

    // only append to an existing tag
    string s0 = "";
    current.get(tag, s0);
    if (s0.length()>0){
        s = s0 + "\n" + s;
    }
    current.put(tag, s);
}

void Parser::parse_data_file(
    Tags const &tags,
    vector <Bibitem> &entries
    )
{
    static const boost::regex delim("@@");

    string current_tag;
    Bibitem current;

    string s;
    while (cin){
        getline(cin, s);
        boost::sregex_token_iterator i( s.begin(), s.end(), delim, -1 ), end;
        int c=0;
        for (; i != end; ++i){
            string dat(*i);
            if (c++ == 0){
                append( dat, current_tag, current );
            }
            else{
                static const boost::regex poss_tag("([-\\w]+)\\s*=?\\s*(.*)");
                boost::cmatch matches;
                if (boost::regex_match(dat.c_str(), matches, poss_tag)){
                    string tag( matches[1].first, 
                                matches[1].second );
                    string val( matches[2].first, 
                                matches[2].second );
                    if (tags.exists(tag)){
                        current_tag = tag;
                        append ( val, current_tag, current );
                    }
                    else{
                        // check if we are just moving on to the next
                        // item
                        static const boost::regex ws("-+");
                        if (boost::regex_match(dat, ws)){
                            save( entries, current );
                            current.clear();
                        }
                        else{
                            throw MyException("no such tag: "+tag);
                        }
                    }
                }
                else{
                    throw MyException("bad format: "+dat);
                }
                    
            }
        }
    }
}

void Parser::print_technologies_table(
    Technologies const &technologies,
    Headings const &headings)
{
    cout << "<ul class='arrow'>" << endl;
    for (int i = 0; i < technologies.size(); ++i){
        cout << "<li><a href='#" << technologies[i] << "'>";
        string tech_heading;
        headings.get(technologies[i], tech_heading);
        cout << tech_heading << "</a>" << endl;
    }
    cout << "<li><a href='#Bibliography'>Bibliography</a>" << endl;
    cout << "</ul>" << endl << endl;
}

void Parser::print_data(
    vector <Bibitem> &entries,
    Technologies const &technologies,
    Headings const &headings)
{
    // sort the entries with the technology ordering
    Bibitem::set_tech(&technologies);
    Bibitem::set_order(BO_TECHNOLOGY_ORDER);
    sort(entries.begin(), entries.end());

    // print the first technology
    string present_tech;
    entries[0].get("technology", present_tech);
    cout << "<a name='" << present_tech << "'></a>" << endl;
    string tech_heading;
    headings.get(present_tech, tech_heading);
    cout << "<p class='hdr1'>" << tech_heading << "</p>" << endl << endl;

    // print the first year
    string present_year;
    int present_year_int;
    entries[0].get("year", present_year);
    istringstream present_year_stream(present_year);
    present_year_stream >> present_year_int;
    cout << "<p class='hdr2'>" 
         << present_year_int << "</p>" << endl << endl;
    
    for (int i = 0; i < entries.size(); i++){

        // if we have a new technology then print it
        string new_tech;
        entries[i].get("technology", new_tech);
        if (new_tech != present_tech) {
            present_tech = new_tech;
            cout << "<a name='" << present_tech << "'></a>" << endl;
            string tech_heading;
            headings.get(present_tech, tech_heading);
            cout << "<p class='hdr1'>" 
                 << tech_heading << "</p>" << endl << endl;
        }

        // if we have a new year then print it
        string new_year;
        int new_year_int;
        entries[i].get("year", new_year);
        istringstream new_year_stream(new_year);
        new_year_stream >> new_year_int;
        if (new_year_int != present_year_int){
            present_year_int = new_year_int;
            cout << "<p class='hdr2'>" 
                 << present_year_int << "</p>" << endl << endl;
        }

        // print the author information
        string authors;
        if (entries[i].get("authors", authors)){
            cout << "<strong>" << authors << "</strong>" << endl;
        } else {
        }

        // print the title information (linked if necessary}
        string title, title_link;
        string pre, post;
        pre = "";
        post = "";
        if (entries[i].get("title_link", title_link)){
            pre = "<a href='"+title_link+"'>";
            post = "</a>";
        }
        entries[i].get("title", title);
        cout << pre << title << post << endl;

        // print the description if present, otherwise the abstract
        string description;
        if (entries[i].get("description", description)){
            cout << description << endl;
        }
        else{
            string abstract;
            if (entries[i].get("abstract", abstract)){
                cout << abstract << endl;
            }
        }
    
        // print a newline between entries
        cout << "<p>" << endl;

    }

}
void Parser::print_bib(
    vector <Bibitem> &entries)
{
    // sort the entries with the technology ordering
    Bibitem::set_order(BO_AUTHOR_ORDER);
    sort(entries.begin(), entries.end());

    cout << "<a name='Bibliography'></a>" << endl;
    cout << "<p class='hdr1'>Bibliography</p>" << endl;

    for (int i = 0; i < entries.size(); i++){
        string authors, title, year, conference, 
            location, series, vol, publisher, workshop,
            eprint_ref, tech_report;
        
        entries[i].get("year", year);

        // Print the author names
        cout << "<p>" << endl;
        if (entries[i].get("authors", authors)){
            cout << authors << ", ";
        }

        // Print the paper title
        if (entries[i].get("title", title)){
            cout << title << ", ";
        }

        // Print conference info correctly
        if (entries[i].get("conference", conference)){
            if (conference == "CRYPTO" ||
                conference == "EUROCRYPT" ||
                conference == "ASIACRYPT"){
                cout << "Advances in Cryptology - "
                     << conference
                     << " " << year << ", ";
            }
            else{
                cout << conference << " " << year << ", ";
            }
        
            if (entries[i].get("location", location)){
                cout << location << ", ";
            }
        
            if (entries[i].get("series", series) &&
                entries[i].get("vol", vol)){
                cout << series << " vol. " << vol << ", ";
            }
        
            if (entries[i].get("publisher", publisher)){
                cout << publisher << ", ";
            }

        }

        // Print workshop info correctly
        if (entries[i].get("workshop", workshop)){
            cout << workshop << " " << year << ", ";
        
            if (entries[i].get("location", location)){
                cout << location << ", ";
            }

            // there may not be workshop proceedings (in which case
            // this info wont be printed)
            if (entries[i].get("series", series) &&
                entries[i].get("vol", vol)){
                cout << series << " vol. " << vol << ", ";
            }
        
            if (entries[i].get("publisher", publisher)){
                cout << publisher << ", ";
            }
        }

        // Print eprint submissions correctly
        if (entries[i].get("eprint_ref", eprint_ref)){
            cout << "Cryptology ePrint Archive, ref. " << eprint_ref << ", ";
        }

        // Print eprint submissions correctly
        if (entries[i].get("tech_report", tech_report)){
            cout << "NTRU Technical Report #" << tech_report << ", ";
        }

        // Always end with the year
        cout << year << "." << endl;
    }

}

Wednesday, April 29, 2009 (1)