.NET Main函数参数解释过程和特殊规则

    技术2022-05-11  17

    最近正在准备一个小型开发工具的发布工作(具体的内容请容我先卖个关子,等发布之后我会详细给大家介绍的)。在使用时不经意中发现,使用 .NET开发的命令行工具的对引号和反斜杠 / 和一般命令行程序有着不同的解释规则。举例来说,如果你在命令行下输入:   C:/> tool “C:/Program Files/”  

    实际上传入的参数是C:/Program Files” 。其实这里不仔细看可能发现不了问题。在原来的命令行中,第一个双引号代表一个参数的开始/结束,后面/”因为.NET的解释规则不同,代表实际的双引号,而非参数的开始/结束,因此最后的结果会多出一个双引号,并且缺少一个反斜杠。

    内部,CLR 使用CommandLineToArgvW 来分析程序的命令行分离出各个参数,这个函数有着特殊的解释规则: 1.     2n 个反斜杠后面跟一个双引号代表n 个反斜杠 2.     2n+1 个反斜杠后面跟一个双引号代表n 个反斜杠加一个” 3.     N 个反斜杠后面不跟双引号直接代表n 个反斜杠 这个规则比较绕,用例子的方式来解释就是: 命令行参数 实际参数 “C:/Program Files/” C:/Program Files” “C:/Program Files//” C:/Program Files/ “C:/Program Files///” C:/Program Files/”

    因此,正确的方式是第二个,也就是”C:/Program Files//”

      事实上,CLR 内部并没有直接调用CommandLineToArgvw ,而是直接实现了一个有着同等功能的函数SegmentCommandLine 。在Rotor 的源代码中可以找到它的实现,位于sscli20/clr/src/utilcode/util.cpp CLR 的主函数_CorExeMain 在执行主函数之前会调用CorCommandLine::SetArgvW ,这个函数会调用SegmentCommandLine 来分析命令行(经过简化):   // Set argvw from command line /* static */

    HRESULT CorCommandLine::SetArgvW(LPCWSTR lpCommandLine)

    {  

        HRESULT hr = S_OK;

    if(!m_ArgvW) {

        // 分析命令行

            m_ArgvW = SegmentCommandLine(lpCommandLine, &m_NumArgs);

     

            // CLR特有的命令行处理,主要是和ClickOnce有关的

            if (m_ArgvW)

                hr = ParseCor();

            else

                hr = E_OUTOFMEMORY;

        }

     

        return hr;

    }     真正在执行Main 主函数的时候,ClassLoader::RunMain 函数则会调用CorCommandLine::GetArgvW 获得之前分析得到的参数列表,并创建对应的托管String 数组并传递给Main (经过简化): /* static */

    HRESULT ClassLoader::RunMain(MethodDesc *pFD ,

                                 short numSkipArgs,                              INT32 *piRetVal,

                                 PTRARRAYREF *stringArgs /*=NULL*/)

    {  

        wzArgs = CorCommandLine::GetArgvW(&cCommandArgs);

     

        // 创建一个托管数组

        StrArgArray = (PTRARRAYREF) AllocateObjectArray((cCommandArgs - numSkipArgs), g_pStringClass);

     

        // 创建对应的托管字符串并赋给托管数组的每个元素

        for( arg = numSkipArgs; arg < cCommandArgs; arg++) {

            STRINGREF sref = COMString::NewString(wzArgs[arg]);

            StrArgArray->SetAt(arg-numSkipArgs, (OBJECTREF) sref);

        }

      MethodDescCallSite threadStart(pFD); // 准备调用MethodDesc指向的主函数(EntryPoint)  

        ARG_SLOT stackVar = ObjToArgSlot(StrArgArray); // 将数组元素转为函数参数

     

    *piRetVal = (INT32)threadStart.Call_RetArgSlot(&stackVar); // 调用主函数(EntryPoint

     

        return hr;

    }   而最关键的SegmentCommandLine 函数代码则如下:   //---------------------------------------------------------------------

    // Splits a command line into argc/argv lists, using the VC7 parsing rules.

    //

    // This functions interface mimics the CommandLineToArgvW api.

    // // If function fails, returns NULL. //

    // If function suceeds, call delete [] on return pointer when done.

    // //---------------------------------------------------------------------

    LPWSTR *SegmentCommandLine(LPCWSTR lpCmdLine, DWORD *pNumArgs)

    {

        STATIC_CONTRACT_NOTHROW;

        STATIC_CONTRACT_GC_NOTRIGGER;

        STATIC_CONTRACT_FAULT;

       

        *pNumArgs = 0;

     

        int nch = (int)wcslen(lpCmdLine);

     

        // Calculate the worstcase storage requirement. (One pointer for

        // each argument, plus storage for the arguments themselves.)

        int cbAlloc = (nch+1)*sizeof(LPWSTR) + sizeof(WCHAR)*(nch + 1);

        LPWSTR pAlloc = new (nothrow) WCHAR[cbAlloc / sizeof(WCHAR)];

        if (!pAlloc)

            return NULL;

     

        LPWSTR *argv = (LPWSTR*) pAlloc; // We store the argv pointers in the first halt

        LPWSTR pdst = (LPWSTR)( ((BYTE*)pAlloc) + sizeof(LPWSTR)*(nch+1) ); // A running pointer to second half to store arguments

        LPCWSTR psrc = lpCmdLine;

        WCHAR   c;

        BOOL    inquote;

        BOOL    copychar;

        int     numslash;

     

        // First, parse the program name (argv[0]). Argv[0] is parsed under

        // special rules. Anything up to the first whitespace outside a quoted

        // subtring is accepted. Backslashes are treated as normal characters.

        argv[ (*pNumArgs)++ ] = pdst;

        inquote = FALSE;

        do {

            if (*psrc == L'"' )

            {

                inquote = !inquote;             c = *psrc++;             continue;

            }

            *pdst++ = *psrc;

     

            c = *psrc++;

     

        } while ( (c != L'/0' && (inquote || (c != L' ' && c != L'/t'))) );

     

        if ( c == L'/0' ) {

            psrc--;

        } else {

            *(pdst-1) = L'/0';

        }

     

        inquote = FALSE;

         

        /* loop on each argument */

        for(;;)

        {

            if ( *psrc )

            {

                while (*psrc == L' ' || *psrc == L'/t')

                {                 ++psrc;             }

            }

     

            if (*psrc == L'/0')

                break;              /* end of args */  

            /* scan an argument */

            argv[ (*pNumArgs)++ ] = pdst;

     

            /* loop through scanning one argument */

            for (;;)

            {

                copychar = 1;

                /* Rules: 2N backslashes + " ==> N backslashes and begin/end quote

                   2N+1 backslashes + " ==> N backslashes + literal "

                   N backslashes ==> N backslashes */

                numslash = 0;             while (*psrc == L'//')             {

                    /* count number of backslashes for use below */

                    ++psrc;                 ++numslash;             }             if (*psrc == L'"')             {

                    /* if 2N backslashes before, start/end quote, otherwise

                       copy literally */                 if (numslash % 2 == 0)                 {                     if (inquote)                     {

                            if (psrc[1] == L'"')

                            {                             psrc++;    /* Double quote inside quoted string */                         }                         else                         {

                                /* skip first quote char and copy second */

                                copychar = 0;                         }                     }                     else                     {                         copychar = 0;       /* don't copy quote */                     }                     inquote = !inquote;                 }                 numslash /= 2;          /* divide numslash by two */             }

       

                /* copy slashes */             while (numslash--)             {                 *pdst++ = L'//';             }

       

                /* if at end of arg, break loop */

                if (*psrc == L'/0' || (!inquote && (*psrc == L' ' || *psrc == L'/t')))

                    break;

       

                /* copy character into argument */

                if (copychar)             {                 *pdst++ = *psrc;             }             ++psrc;

            }

     

            /* null-terminate the argument */

     

            *pdst++ = L'/0';          /* terminate string */

        }

     

        /* We put one last argument in -- a null ptr */

        argv[ (*pNumArgs) ] = NULL;

     

        _ASSERTE((BYTE*)pdst <= (BYTE*)pAlloc + cbAlloc);

        return argv;

    } 有关CLR 执行Main 函数执行过程的更多有关内容我会在下篇Rotor 源码研究中详细解释,敬请关注。

    Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1852691


    最新回复(0)