Ben Bacarisse
7/13/2011 1:50:00 AM
Ceriousmall <divadsmall@gmail.com> writes:
> feel free to review this code I look forward to the
> comments............
>
> /* 2011/07 Ceriousmall. . . .
> * this program checks a C source file for rudimentary syntax errors
> such as
> * unbalanced parentheses, brackets and braces
> * quotes both double and single
> * escape sequences and comments
> */
>
> #include <stdio.h>
>
> #define OUT 0
> #define IN 1
> #define MAXLINE 1025 /* maximum input line size */
It's always more satisfying to avoid limits like this.
> int at_start, char_const_state, quoted_string_state, comment_state;
> int line_num, referance_num, parentheses_detected, brackets_detected;
>
> /* assigns the character string to line[] */
> int gotline(char line[], int max_line_length)
> {
> int ch, address;
>
> for (address = 1; address < max_line_length && (ch = getchar()) !=
> EOF && ch != '\n'; ++address)
> line[address] = ch;
>
> if (ch == '\n')
> line[address++] = ch;
>
> line[address] = '\0';
This can write outside the line array. What's wrong with line[0]? You
seem to not want to use it.
> return ch;
> }
>
> /* check for basic syntax errors */
> void syntaxcheck(char line[], int open_brace_address_map[], int
> closed_brace_address_map[])
> {
> int address;
>
> line_num += 1;
>
> for (address = 1; line[address] != '\0'; ++address)
> if (line[address] == '\'' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> char_const_state = IN;
>
> else if (line[address] == '\'' && line[address+1] == '\'' &&
> char_const_state == IN) {
What's this case for?
> char_const_state = OUT;
> ++address;
> }
> else if (line[address] == '\'' && char_const_state == IN)
> char_const_state = OUT;
You know my view of these sort of state variables rather than using
plain Boolean values. You obviously disagree so I won't make the point
again!
> else if (line[address] == '"' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> quoted_string_state = IN;
>
> else if (line[address] == '"' && quoted_string_state == IN)
> quoted_string_state = OUT;
>
> else if (line[address] == '/' && line[address+1] == '*' &&
> quoted_string_state == OUT && comment_state == OUT) {
> comment_state = IN;
> ++address;
> }
> else if (line[address] == '*' && line[address+1] == '/' &&
> comment_state == IN) {
> comment_state = OUT;
> ++address;
> }
> else if (line[address] == '(' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> ++parentheses_detected;
>
> else if (line[address] == ')' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> --parentheses_detected;
>
> else if (line[address] == '[' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> ++brackets_detected;
>
> else if (line[address] == ']' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT)
> --brackets_detected;
>
> else if (line[address] == '{' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT) {
> open_brace_address_map[line_num] = address;
> referance_num = line_num+1;
> }
> else if (line[address] == '}' && char_const_state == OUT &&
> quoted_string_state == OUT && comment_state == OUT) {
> closed_brace_address_map[line_num] = address;
>
> while (open_brace_address_map[referance_num] == 0)
> --referance_num;
>
> if (open_brace_address_map[referance_num] > 0) {
> open_brace_address_map[referance_num] = 0;
> closed_brace_address_map[line_num] = 0;
> }
> }
>
> if (at_start == EOF) {
> if (char_const_state == IN)
> printf("syntax error...... fragmented character constant.\n");
>
> if (quoted_string_state == IN)
> printf("syntax error...... character string missing closing
> argument.\n");
>
> if (comment_state == IN)
> printf("syntax error...... expected '*/' token after identifier.
> \n");
>
> if (parentheses_detected != 0)
> printf("syntax error...... unbalanced parentheses detected.\n");
>
> if (brackets_detected != 0)
> printf("syntax error...... unbalanced brackets detected.\n");
>
> for (line_num = 1; line_num < MAXLINE; ++line_num) {
> if (closed_brace_address_map[line_num] > 0) {
> printf("syntax error......line(%d),", line_num);
> printf(" col(%d), expected identifier before '}' token.\n",
> closed_brace_address_map[line_num]);
> }
> if (open_brace_address_map[line_num] > 0) {
> printf("syntax error......line(%d),", line_num);
> printf(" col(%d), closing argument missing, expected '}'.
> \n", open_brace_address_map[line_num]);
> }
> }
> }
> }
>
> /* time to execute the entire construct */
> int main(void)
> {
> char line[MAXLINE];
> int open_brace_address_map[MAXLINE],
> closed_brace_address_map[MAXLINE];
>
> at_start = char_const_state = quoted_string_state = comment_state =
> OUT;
> referance_num = parentheses_detected = brackets_detected = 0;
>
> for (line_num = 1; line_num < MAXLINE; ++line_num) {
> open_brace_address_map[line_num] = 0;
> closed_brace_address_map[line_num] = 0;
> }
> line_num = referance_num;
> open_brace_address_map[line_num] = closed_brace_address_map[line_num]
> = -1;
>
> while (at_start != EOF) {
> at_start = gotline(line, MAXLINE);
> syntaxcheck(line, open_brace_address_map,
> closed_brace_address_map);
> }
> return 0;
> }
I don't understand what the brace_address_map arrays are for. Maybe a
comment about them would help.
Case to consider:
(a) C can have continuation lines.
(b) array[(x]); might be described as unbalanced parentheses.
(c) Similarly, you might want to check for )( and ][.
(c) This is valid C:
#define OPEN {
int main(void) { return 0; }
and fails whereas this is wrong and passes:
#define OPEN {
#define CLOSE }
int main(void) OPEN return 0;
I suspect you will have to rule out any use of the pre-processor because
of all the tricks it can play. Another is:
#define STR(x) #x
STR(})
(d) Technically, there are trigraphs and digraphs to consider as well.
There is no shame in ignoring these.
--
Ben.