lexical scanner of string literal token in lcc

    技术2022-05-19  18

    1. string literal definition in c language     1> "hello world"                   // ok     2> L"hello world"                 // ok (wide character for representing chinese, japanese)     3> "i " "love " "you"               // ok, apply string concetation     4> "i " "love "          "you"                                 // ok, apply string concetation     5> ""                                       // ok, empty string     6> "hello          world"                              // error     7> "hello /          tworld"                             // error of c definition, but compile succeessfully in lcc & gcc [1]

     

    2. call relationship

        expression    // expr.c, see primary(void)

                |---> STRING_LITERAL_DFA   //  gettok()

                                  |--->  scanner        //  scon()

                                                |--->  get escape sequence   // backslash()

                                                |--->  copy string literal into buffer   // cput(), or wcput()

                                                |--->  storage buffer for string literal  // char cbuf[BUFSIZE+1], or wchar wcbuf[BUFSIZE+1]

     

    3. comment on source code

    // comments on scanner of string literal, in lex.c

    813 static void *scon(int q, void *put(int c, void *cl), void *cl) { 814     int n = 0, nbad = 0;              // n counts how many characters in string literal 815                                                    // q is the enclosing character, for example '/"'(double quotes) 816     do { 817         cp++; 818         while (*cp != q) { 819             int c; 820             if (map[*cp]&NEWLINE) { 821                 if (cp < limit)             // like 6> invalid occurence of literal string definition, goto error 822                     break; 823                 cp++; 824                 nextline(); 825                 if (cp == limit)          // reach EOF, but without matching the enclosing character, goto error 826                     break; 827                 continue; 828             } 829             c = *cp++; 830             if (c == '//') { 831                 if (map[*cp]&NEWLINE) { 832                     if (cp < limit)       // like 7> invalide occurence of literal string definition, goto error 833                         break; 834                     cp++; 835                     nextline(); 836                 } 837                 if (limit - cp < MAXTOKEN) 838                     fillbuf(); 839                 c = backslash(q);      // deal with escape sequence 840             } else if (c < 0 || c > 255 || map[c] == 0)     // ? not sure of the use 841                 nbad++; 842             if (n++ < BUFSIZE) 843                 cl = put(c, cl);            // stores string literals in cbuf[BUFSIZE+1] or wcbuf[BUFSIZE+1] 844         } 845         if (*cp == q) 846             cp++; 847         else 848             error("missing %c/n", q); 849     } while (q == '"' && getchr() == '"');   // there is an '/"' after the literal string, so apply string concetation 850     cl = put(0, cl); 851     if (n >= BUFSIZE) 852         error("%s literal too long/n", q == '"' ? "string" : "character"); 853     if (Aflag >= 2 && q == '"' && n > 509) 854         warning("more than 509 characters in a string literal/n"); 855     if (Aflag >= 2 && nbad > 0) 856         warning("%s literal contains non-portable characters/n", 857             q == '"' ? "string" : "character"); 858     return cl; 859 }

     

    // comments on string literal DFA, in lex.c

    156 int gettok(void) {

     ........

    370         case '"': { 371             char *s = scon(*--cp, cput, cbuf);                       // s points to cbuf[] or wcbuf[] 372             tval.type = array(chartype, s - cbuf, 0);             // !! array(widechar, s - wcbuf, 0) for wide string literal 373             tval.u.c.v.p = cbuf;                                                // !! no string copy (1) 374             tsym = &tval; 375             return SCON; 376             }

     

    // comments on literal string expression, in expr.c

    339 static Tree primary(void) { 340     Tree p; 341 342     assert(t != '('); 343     switch (t) { 344     case ICON:

    ....

    348     case SCON: if (ischar(tsym->type->type))     // !!! for ASCII character, tsym->type is a "char" 349             tsym->u.c.v.p = stringn(tsym->u.c.v.p, tsym->type->size);    // !!! string copy here (2) 350            else                                                               // !!! wide string character, tsym->type is a "widechar " 351             tsym->u.c.v.p = memcpy(allocate(tsym->type->size, PERM), tsym->u.c.v.p, tsym->type->    size); 352            tsym = constant(tsym->type, tsym->u.c.v); 353            if (tsym->u.c.loc == NULL) 354             tsym->u.c.loc = genident(STATIC, tsym->type, GLOBAL); 355            p = idtree(tsym->u.c.loc); break;

    [1] invalid definition in c, but valid in c++, no error reported because afte c-preprocessor, it's become "hello /tworld"


    最新回复(0)