tags:

views:

273

answers:

3

Hello all :)

I've got a large (read: nightmare) method which has grown over the years to support my project's ever growing list of commandline arguments. I mean several pages of readme docs for brief blurbs per argument.

As I've added each feature, I've simply "registered" a way of handling that argument by adding a few lines to that method.

However, that method is now unsightly, bug prone, and difficult to understand. Here's an example of the shorter of the two methods currently handling this:

//All double dash arguments modify global options of the program,
//such as --all --debug --timeout etc.
void consoleParser::wordArgParse(std::vector<criterion *> *results)
{
    TCHAR const *compareCurWordArg = curToken.c_str()+2;
    if (!_tcsicmp(compareCurWordArg,_T("all")))
    {
        globalOptions::showall = TRUE;
    } else if (!_tcsnicmp(compareCurWordArg,_T("custom"),6))
    {
        if (curToken[9] == L':')
        {
            globalOptions::display = curToken.substr(10,curToken.length()-11);
        } else
        {
            globalOptions::display = curToken.substr(9,curToken.length()-10);
        }
    } else if (*compareCurWordArg == L'c' || *compareCurWordArg == L'C')
    {
        if (curToken[3] == L':')
        {
            globalOptions::display = curToken.substr(5,curToken.length()-6);
        } else
        {
            globalOptions::display = curToken.substr(4,curToken.length()-5);
        }
    } else if (!_tcsicmp(compareCurWordArg,_T("debug")))
    {
        globalOptions::debug = TRUE;
    } else if (!_tcsicmp(compareCurWordArg,L"expand"))
    {
        globalOptions::expandRegex = false;
    } else if (!_tcsicmp(compareCurWordArg,L"fileLook"))
    {
        globalOptions::display = L"---- #f ----#nCompany: #d#nFile Description: #e#nFile Version: #g"
        L"#nProduct Name: #i#nCopyright: #j#nOriginal file name: #k#nFile Size: #u#nCreated Time: #c"
        L"#nModified Time: #m#nAccessed Time: #a#nMD5: #5#nSHA1: #1";
    } else if (!_tcsicmp(compareCurWordArg,_T("peinfo")))
    {
        globalOptions::display = _T("[#p] #f");
    } else if (!_tcsicmp(compareCurWordArg,L"enable-filesystem-redirector-64"))
    {
        globalOptions::disable64Redirector = false;
    } else if (!_tcsnicmp(compareCurWordArg,_T("encoding"),8))
    {
        //Performance enhancement -- encoding compare only done once.
        compareCurWordArg += 8;
        if (!_tcsicmp(compareCurWordArg,_T("acp")))
        {
            globalOptions::encoding = globalOptions::ENCODING_TYPE_ACP;
        } else if (!_tcsicmp(compareCurWordArg,_T("oem")))
        {
            globalOptions::encoding = globalOptions::ENCODING_TYPE_OEM;
        } else if (!_tcsicmp(compareCurWordArg,_T("utf8")))
        {
            globalOptions::encoding = globalOptions::ENCODING_TYPE_UTF8;
        } else if (!_tcsicmp(compareCurWordArg,_T("utf16")))
        {
            globalOptions::encoding = globalOptions::ENCODING_TYPE_UTF16;
        } else
        {
            throw eMsg(L"Unrecognised encoding word argument!\r\nValid choices are --encodingACP --encodingOEM --encodingUTF8 and --encodingUTF16. Terminate.");
        }
    } else if (!_tcsnicmp(compareCurWordArg,L"files",5))
    {
        compareCurWordArg += 5;
        if (*compareCurWordArg == L':') compareCurWordArg++;
        std::wstring filePath(compareCurWordArg);
        globalOptions::regexes.insert(globalOptions::regexes.end(), new filesRegexPlaceHolder);
        results->insert(results->end(),new filesRegexPlaceHolder);
        boost::algorithm::trim_if(filePath,std::bind2nd(std::equal_to<wchar_t>(),L'"'));
        loadFiles(filePath);
    } else if (!_tcsicmp(compareCurWordArg,_T("full")))
    {
        globalOptions::fullPath = TRUE;
    } else if (!_tcsicmp(compareCurWordArg,_T("fs32")))
    {
        globalOptions::disable64Redirector = false;
    } else if (!_tcsicmp(compareCurWordArg,_T("long")))
    {
        globalOptions::display = _T("#t #s #m  #f");
        globalOptions::summary = TRUE;
    } else if (!_tcsnicmp(compareCurWordArg,_T("limit"),5))
    {
        compareCurWordArg += 5;
        if (*compareCurWordArg == _T(':'))
            compareCurWordArg++;
        globalOptions::lineLimit = _tcstoui64(compareCurWordArg,NULL,10);
        if (!globalOptions::lineLimit)
        {
            std::wcerr << eMsg(L"Warning: You are limiting to infinity lines. Check one of your --limit options!\r\n");
        }
    } else if (!_tcsicmp(compareCurWordArg,_T("short")))
    {
        globalOptions::display = _T("#8");
    } else if (!_tcsicmp(compareCurWordArg,_T("summary")))
    {
        globalOptions::summary = TRUE;
    } else if (!_tcsicmp(compareCurWordArg,_T("norecursion")))
    {
        globalOptions::noSubDirs = TRUE;
    } else if (!_tcsnicmp(compareCurWordArg,_T("timeout"),7))
    {
        compareCurWordArg += 7;
        if (*compareCurWordArg == _T(':'))
            compareCurWordArg++;
        globalOptions::timeout = _tcstoul(compareCurWordArg,NULL,10);
        if (!globalOptions::timeout)
        {
            std::wcerr << eMsg(L"Warning: You are limiting to infinite time. Check one of your --timeout options!\r\n");
        }
    } else if (!_tcsnicmp(compareCurWordArg,_T("tx"),2))
    {
        compareCurWordArg += 2;
        if (*compareCurWordArg == _T(':'))
            compareCurWordArg++;
        globalOptions::timeout = _tcstoul(compareCurWordArg,NULL,10);
        if (!globalOptions::timeout)
        {
            std::wcerr << eMsg(L"Warning: You are limiting to infinite time. Check one of your --timeout options!\r\n");
        }
    } else
    {
        throw eMsg(L"Could not understand word argument! Ensure all of your directives are spelled correctly. Terminate.");
    }
}

I'd post the long one but it's over 500 lines.

Are there better ways of handling this particular problem or should I just leave it as a long method?

EDIT: I'm not looking for a tokenizing library -- I already did the dirty work on that. I'm curious if it would make sense to make stub methods out of the larger dirty method.

Billy3

+5  A: 

I'm sure that there's an equivalent of getopt(3) function for Windows. Here's the first hit from Google - Pete Wilson. Or you can look into Boost Program Options for a decent C++ library.

Nikolai N Fetissov
Would the boost library (I'm already using boost) support reading the following as a single argument? [ -files"C:\Documents and Settings\User\Desktop\InFile.txt" ] Note how the token is not quoted, and does contain spaces. The quotes start inside the token itself.
Billy ONeal
Try it, it's really simple to use.
Nikolai N Fetissov
Tried it .. doesn't work in my current scenario. Project implements a recursive-descent parser on the commandline. For example:`progname C:\Windows\* AND -tf OR *.dll OR *.exe` prints all files in windows that are files (not folders), or are .dll, or are .exe.The boost library is designed around options coming in pairs -- it's not complex enough for my application. I'm sorry.
Billy ONeal
OK, I'd didn't get your requirements from the original question - you were talking about options, not a language. On the other hand, recursive-descent parsing is not all that hard, but it really pays off to define (or at least write down) your grammar. I prefer to write proper (even if simple) lexer and parser and build the AST for anything like that.
Nikolai N Fetissov
+2  A: 

What you're needing is a command line option parser library to take care of the messy details of processing the commandline arguments.

I'm not sure which one is the best for C++ since I'm a C# developer who uses CSharpOptParse...but the concept should be the same so hopefully knowing what to look for will point you in the right direction.

mezoid
Not sure I can do that because my application supports some pretty bizarre commandlines. For example, this is perfectly valid:-files"C:\Documents and Settings\User\Desktop\InFile.txt"Note how the quotes are handled in a non-standard manner and are treated as one token.My question I guess is more of a structural one.
Billy ONeal
A: 

HI All, i wrote this little helper for working with commandlines. I also updated it to work with the funky: --file'thing' and split as the questioner needs. To make it use another character type just do a replace of the char type with what you are using. This is a fully working example you can paste into a main.cpp and run. The code does proper escaping, quotegrouping and : and = '" as name/value splitters for args so you can do --flag:1 or -file"c:\test". Do note space is used as the option splitter. It would look something like this to use it in code:

optparse opt(argstring);
g_someint = strtoul(opt.get('--debuglevel','0'),0,0);
g_somebool = opt.get('--flag')!=0;
g_somestring = opt.get('--file','default.txt')

To answer the question: you can see that this makes your argument processing code so simple, you really dont need to modularize it. Its readable and maintainable.

#include <string.h>
#include <stdio.h>  

struct optparse{
    optparse(const char *args, size_t len = 0) : first(0) {
        size_t i;
        if(!args)args = "";
        if(!len)for(;args[len];len++);
        for(buf=new char[len+1],i=0;i<len;i++)buf[i]=args[i];buf[i]=0;
        opt *last = first;
        char *c = buf, *b = c, *v = 0, g = 0, e = 0;
        do{
            if(*c=='\\') e = e?0:1;
            else if(e?--e:1){
                if(g){ if(*c == g) g = 0; }
                else {    
                    if(*c=='"' || *c=='\''){ if(b<c && !v) v = c; g = *c; }
                    else if(!v && (*c==':' || *c=='='))    v = c; 
                    else if(*c==' '){                    
                        if(b<c)last = new opt(last,&first,b,c,v); 
                        b = c+1, v = 0;
                    }
                }
            }
            if(*c) c++;
            if(!*c && b<c) last = new opt(last,&first,b,c,v);
        }while(*c);
        for(opt *i = first; i; i = i->next) *(i->ne) = 0, *(i->ve) = 0;
    }  
    ~optparse(){
        delete buf;
        while(first){
            opt *t = first->next;
            delete first;
            first = t ;
        }
    }  

    const char *get( const char *name, const char *def= 0){
        size_t l = strlen(name);
        for(opt *i = first;i;i = i->next) if( _strnicmp( i->name, name, l ) == 0) 
            return i->value;
        return def;
    }  

    struct opt{
        opt( opt *last, opt **first, char *s, char *e, char *v){
            if(!*first) *first = this; if(last) last->next = this;
            if(v && (*v=='\'' || *v=='"') && (*(e-1)=='\'' || *(e-1) == '"'))e--;
            next = 0, name = s, value = v?v+1:"", ne = v?v:e, ve = e; 
        }
        char *name, *value, *ne, *ve;
        opt *next;
    };  
    char *buf;
    opt *first;
};  

int main(){  

    const char *v, *test ="--debug:1 -file'c:\\something' --odd=10";
    optparse opts(test);  

    if(v = opts.get("--debug")){
       printf("debug flag value is %s\n",v);
    }  

    for(optparse::opt *i=opts.first;i;i=i->next){
        printf("name: %s value: %s\n",i->name,i->value);
    }  
}

The parser is very easily tweaked to support different types of argument processing. For instance if you replace

if(b<c)last = new opt(last,&first,b,c,v); 
b = c+1, v = 0;

with

if(*b=='-' && *(c+1)!='-')v = v?v:c;
else{
   if(b<c)last = new opt(last,&first,b,c,v); 
   b = c+1, v = 0;
}

You will have added the feature of joining space-split arguments as 'value' like: -debug 1 or --files a.txt b.txt c.txt Also if you dislike the : as a split param (might be bothersome in windows apps) just remove the ==':'