본문 바로가기

System Programming

시스템 프로그래밍 프로젝트 #7 최종 (Assembler in C)

문제 :

지금까지의 프로젝트를 참고하여 2 pass assembler를 만들면 됩니다.

먼저 어셈블러(Assembler)란?

하드웨어가 직접 이해하여 실행하는 기계어는 일반적으로 비트 열 또는 16진수로 표현되기 때문에 인간이 이해하기 어렵다. 그래서 인간이 이해하기 쉽도록 기계어와 거의 일대일로 대응하는 기호로 표현된 언어로 어셈블러 언어가 있으며, 어셈블러 언어를 기계어로 번역하는 프로그램 어셈블러, 번역하는 것을 어셈블이라고 합니다.

 

어셈블러의 역할을 그림으로 간단하게 나타내 보면 다음과 같습니다.

 

이 글에서 구현 할 2 패스 어셈블러의 알고리즘을 보겠습니다.

pass 1 :

 

pass 2 :

 

입력 파일 :

input.txt
0.00MB

 

실행 결과 :

 

출력 파일 :

 

코드 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
 
#define MAX 100
#define XR 32768 // X 레지스터 더할 값 
 
int pass1(char *bp);
int pass2(char *bp, FILE *Sample_o);
//int find_optab();
int fprint(int res, FILE *sample_1st); // pass 1의 실행 결과를 파일에 출력하는 함수
 
struct  OPTAB {
    char name[8];
    int len;
} optab[] = { {"LDA"00}, {"STA"12}, {"TIX"44}, {"STL"20}, {"JSUB"72}, {"COMP"40}, {"JEQ"48}, 
{"J"60}, {"RSUB"76}, {"BYTE"1}, {"WORD"3}, {"RESW"3}, {"RESB"1}, {"TD"224}, {"RD"216}, {"LDX"04}, 
{"STCH"84}, {"JLT"56}, {"STX"16}, {"WD"220}, {"LDCH"80}};
 
struct TABLE { 
    char lable[10], opcode[10], oprand[10];
    int symloc; // lable의 주소 
    int object;
} symtab[MAX];
 
int p1_start;
long locctr=0// 위치 계수기 LOCCTR 
long p2_locctr=0;
int flag=0// pass 1 에서 오류판독, loop정지, 이벤트 후 다음 줄 읽어옴 등 여러가지 상황을 알려주는 flag 
int flag2; // opcode가 OPTAB에 정의된 단어인지 아닌지 알려주는 flag 
int j=0// 정상적으로 SYMTAB에 추가된 lable 개수 count 
int cnt=0;    // line number, 한 줄당 5씩 증가 ( 5, 10, 15, 20, ..... , cnt)
int res = 0// 파일 출력시 loop문 탈출을 위한 flag  
int flag3 = 0// space를 파일의 lable에 출력하기 위한 flag 
int p2_start=0;
int l=0, T=10;
int a=1, b=11;
 
int main() {
    int i;
    FILE *fp;
    FILE *sample_1st;
    FILE *sample_o;
    char buf[80];
    char *token;
    if( (fp = fopen("input.txt""r")) == NULL) { // 입력 소스파일 오픈 
        printf("input.txt not found...\n"); exit(1);
    }
    if( (sample_1st = fopen("Sample.txt""w+")) == NULL) { // 출력 Sample파일 오픈
        printf("Sample.txt not open...\n"); exit(1);
    }
    if( (sample_o = fopen("Sample_o.txt""w+")) == NULL){ // 출력 Sample.o파일 오픈
        printf("Sample.txt not open...\n"); exit(1);
    }
    
    // pass 1
    
    while(fgets(buf, sizeof(buf), fp) != NULL){ 
        
        flag = pass1(buf);
        res = fprint(res, sample_1st);
        if(res == 4)
            break;
    }
    fclose(fp);
    
    rewind(sample_1st); // sample_1st의 file pointer를 0(시작점)으로 이동
    
    /*
    void rewind(FILE *stream);
        : 파일의 현재 위치를 0(시작점)으로 이동
    */
    
    while(fgets(buf, sizeof(buf), sample_1st) != NULL){ // Sample.txt 파일 내용 출력 
        printf("%s",buf);
    }
    printf("\ntotal length : %X", locctr);
    printf("\n\n");
    
    printf("\n---------Symbol Table---------\n\n");
    printf("\tSymbol\tValue\n\n");
    
    for(i=1; i<MAX; i++){
        if(!strlen(symtab[i].lable) < 1 )
            printf("\t%s\t%X\n",symtab[i].lable, symtab[i].symloc);
    }
    printf("\n\n");
 
    // pass 2    
    
    rewind(sample_1st); // sample_1st의 file pointer를 0(시작점)으로 이동 
        
    printf("---------Object Code---------\n\n");
     
    while(fgets(buf, sizeof(buf), sample_1st) != NULL){ // Sample.txt 파일을 한줄씩 읽어옴
        flag = pass2(buf, sample_o);
        if(flag == 1)
            break;
    }
    
    fclose(sample_1st);
    fclose(sample_o);
    
    return 0
}
 
int pass2(char *bp, FILE *Sample_o){
    char test;
    char *tmp_token;
    int i,k,n;
    char *token = strtok(bp, "\t\n");
    char tmp_lable[10];
    if(token == NULL// token이 NULL이면 다음 줄 읽어옴
        return 0;
    if(p2_start == 0){ //p2_start가 0이면 실행 
        for(i=0; i<3; i++){
            if(i==2)
                strcpy(tmp_lable, token);
            token = strtok(NULL"\t\n");
        }
        if(!strcmp(token, "START")){
            printf("H^%s^",tmp_lable);
            fprintf(Sample_o, "H%s", tmp_lable);
            token = strtok(NULL"\t\n");
            p2_start = strtoul(token, NULL16);
            p2_locctr = p2_start;
            printf("%.6X^%.6X\n", p2_locctr, locctr);
            fprintf(Sample_o, "%.6X%.6X\n", p2_locctr, locctr);
            printf("T^%.6X^",p2_locctr);
            fprintf(Sample_o, "T%.6X",p2_locctr);
            printf("%.2X^",symtab[b].symloc-p2_start);
            fprintf(Sample_o, "%.2X", symtab[b].symloc-p2_start);
            b += 7;
        }
        else
            return 4;
    }
    else {
        p2_locctr = symtab[a].symloc;
        a++;
        for(i=0; i<4; i++){
            if(l==10){
                printf("\n");
                fputs("\n",Sample_o);
                printf("T^");
                fputs("T",Sample_o);
                printf("%.6X^",p2_locctr);
                fprintf(Sample_o, "%.6X",p2_locctr);
                printf("%.2X^",symtab[b].symloc-p2_locctr);
                fprintf(Sample_o, "%.2X", symtab[b].symloc-p2_locctr);
                b += 13;
                l++;
            }
            else if(l==18){
                p2_locctr = symtab[a+2].symloc;
                printf("\n");
                fputs("\n",Sample_o);
                printf("T^");
                fputs("T",Sample_o);
                printf("%.6X^",p2_locctr);
                fprintf(Sample_o, "%.6X",p2_locctr);
                printf("%.2X^",symtab[b].symloc-p2_locctr);
                fprintf(Sample_o, "%.2X", symtab[b].symloc-p2_locctr);
                b += 10;
                l++;
            }
            else if(l==32){
                printf("\n");
                fputs("\n",Sample_o);
                printf("T^");
                fputs("T",Sample_o);
                printf("%.6X^",p2_locctr);
                fprintf(Sample_o, "%.6X",p2_locctr);
                printf("%.2X^",symtab[b].symloc-p2_locctr);
                fprintf(Sample_o, "%.2X", symtab[b].symloc-p2_locctr);
                b += 3;
                l++;
            }
            else if(l==43){
                printf("\n");
                fputs("\n",Sample_o);
                printf("T^");
                fputs("T",Sample_o);
                printf("%.6X^",p2_locctr);
                fprintf(Sample_o, "%.6X",p2_locctr);
                printf("%.2X^",symtab[b].symloc-p2_locctr);
                fprintf(Sample_o, "%.2X", symtab[b].symloc-p2_locctr);
                l++;
            }
            switch (i){
                case 0:
                    break;
                case 1:
                    break;
                case 2:
                    break;
                case 3:
                    if(!strcmp(token, "END")){
                        printf("\nE^");
                        fputs("\nE",Sample_o);
                        printf("%.6X",p1_start);
                        fprintf(Sample_o, "%.6X\n",p1_start);
                        printf("^\n");
                        return 1;
                    }
                    else if(!strcmp(token ,"WORD")){
                        token = strtok(NULL"\t\n");
                        printf("%.6X", atoi(token));
                        fprintf(Sample_o, "%.6X", atoi(token));
                        printf("^");
                        l++;
                        break;
                    }
                    else if(!strcmp(token ,"BYTE")){
                        token = strtok(NULL"\t\n");
                        if(token[0== 'X') { // oprand의 첫부분이 X이면 
                            for(k=2; k< strlen(token)-1; k++){  // ex) X'F1' 이면 X,',' 세개를 제외한 길이 
                                printf("%c", token[k]);
                                fprintf(Sample_o, "%c", token[k]);    
                            }
                            printf("^");
                            l++;
                            break;
                        }
                        else if(token[0== 'C') { // oprand의 첫 부분이 C이면
                            for(k=2; k< strlen(token)-1; k++){ // ex) C'EOF' 이면 X,',' 세개를 제외한 길이
                                printf("%X", token[k]);
                                fprintf(Sample_o, "%X", token[k]);    
                            }
                            printf("^");
                            l++;
                        break;
                        }
                    }
                    else if(!strcmp(token, "RESB"|| !strcmp(token, "RESW")) {
                        l++;
                        break;
                    }
                    for(k=0; k<sizeof(optab)/12; k++){
                        if(!strcmp(optab[k].name, token)){ // opcode가 OPTAB에 정의된 단어이면 
                            token = strtok(NULL",\t\n"); // 다음 token을 받아온다 (oprand)
                            tmp_token = strtok(NULL"\t\n");
                            if(tmp_token != NULL && strcmp(token, " ")){ // oprand가 존재하면서 콤마 뒷부분이 있는 경우 
                                if(!strcmp(tmp_token, "X")){ // 콤마 뒷부분이 X이면 
                                    for(n=0; n<j; n++){ 
                                        if(!strcmp(symtab[n].lable, token)){
                                            printf("%.2X%.4X",optab[k].len, symtab[n].symloc+XR);
                                            fprintf(Sample_o, "%.2X%.4X",optab[k].len, symtab[n].symloc+XR);
                                            n=0;
                                            l++;
                                            if(l!=10)
                                                printf("^");
                                            else if(l!=17)
                                                printf("^");
                                            else if(l!=27)
                                                printf("^");
                                            break;
                                        }  
                                    }
                                    if(n==j){
                                        printf("%.2X0000",optab[k].len);
                                        fprintf(Sample_o, "%.2X0000",optab[k].len);
                                        l++;
                                        if(l!=10)
                                            printf("^");
                                        else if(l!=17)
                                            printf("^");
                                        else if(l!=27)
                                            printf("^");
                                    }
                                }
                            }
                            else if(!strcmp(token, " ")){ // oprand가 NULL이면 메시지 출력
                                printf("%.2X0000",optab[k].len);
                                fprintf(Sample_o, "%.2X0000",optab[k].len);
                                l++;
                                if(l!=10)
                                    printf("^");
                                else if(l!=17)
                                    printf("^");
                                else if(l!=27)
                                    printf("^");
                            }
                            else // oprand가 있으면 SYMTAB에 있는지 판독 
                                for(n=0; n<j; n++){ 
                                    if(!strcmp(symtab[n].lable, token)){
                                        printf("%.2X%.4X",optab[k].len, symtab[n].symloc);
                                        fprintf(Sample_o, "%.2X%.4X",optab[k].len, symtab[n].symloc);
                                        n=0;
                                        l++;
                                        if(l!=10)
                                            printf("^");
                                        else if(l!=17)
                                            printf("^");
                                        else if(l!=27)
                                            printf("^");
                                        break;
                                    }  
                                }
                                if(n==j){
                                    printf("%.2X0000",optab[k].len);
                                    fprintf(Sample_o, "%.2X0000",optab[k].len);
                                    l++;
                                    if(l!=10)
                                        printf("^");
                                    else if(l!=17)
                                        printf("^");
                                    else if(l!=27)
                                        printf("^");
                                }
                                
                        }
                        else if(k==j)
                            printf("not found opcode...\n");
                        
                    }
                    break;
            }
            token = strtok(NULL"\t\n");        
        }
    }
    return 0;
}
 
int pass1(char *bp) {
    int i,k;
    flag3=0;
    int tmp_loc=1;
    char tmp_lable[20];
    char *token = strtok(bp, "\t\n");
    if(token == NULL){ // token이 NULL이면 출력 파일에 \n 출력 후 다음 줄 읽어옴  
        return 2;
    }
    
    if(locctr==0){ // 위치 계수기가 0일때만 실행 
        for(i=0; i<2;i++){
            if(i==1// tmp_lable에 lable을 임시로 저장, opcode 부분이 START가 아니면 사용 안함 
                strcpy(tmp_lable, token);
            token = strtok(NULL"\t\n");
        }
        if(!strcmp(token, "START")){ // opcode 부분이 START이면 locctr에 oprand 부분의 주소를 저장하고 다음 줄 읽어옴 
            strcpy(symtab[j].lable, tmp_lable); // 임시 lable을 SYMTAB에 저장 
            strcpy(symtab[j].opcode, token); // 현재 라인의 opcode도 SYMTAB에 저장 
            token = strtok(NULL"\t\n"); 
            strcpy(symtab[j].oprand, token); // 현재 라인의 oprand도 SYMTAB에 저장  
            locctr = strtoul(symtab[j].oprand, NULL16); // 위치 계수기에 oprand 부분을 저장, oprand는 16진수이기 때문에 locctr에 10진수로 바꾸어서 저장 
            p1_start = locctr;
            symtab[j].symloc = locctr;
        }
        else // opcode 부분이 START가 아니면 아무것도 하지않고 opcode 부분이 START 일 때 까지 다음 줄 읽어옴
            return 4;
    }
    else { // 위치 계수기가 0이 아니면 실행
        symtab[j].symloc = locctr;
        for(i=0; i<3; i++){
            if(!strcmp(token, " ")){ 
                token = strtok(NULL"\t\n");
                if(i==1)
                    flag3 = 1// space를 파일의 lable에 출력하기 위한 flag 
                continue;
            }
            switch (i){
                case 0:
                    break;
                case 1// lable
                    for(k=0; k<j; k++){
                        if(!strcmp(token, symtab[k].lable)){ // 현재 token을 lable 개수만큼 SYMTAB과 비교해서 같으면 중복 메시지 출력 후 라인 무시 
                            printf("----- 중복 -----\n");
                            return 4;
                        }
                    }
                    strcpy(symtab[j].lable, token);
                    symtab[j].symloc = locctr;
                    break;    
                case 2// opcode, oprand
                    strcpy(symtab[j].opcode, token); // opcode 저장 
                    if(!strcmp(symtab[j].opcode, "END")){ // opcode가 END이면 oprand 저장 및 프로그램 길이 구한 후 줄 읽기 종료
                        token = strtok(NULL"\t\n");
                        strcpy(symtab[j].oprand, token);
                        cnt += 5;
                        j++;
                        locctr -= symtab[0].symloc;
                        return 3;
                    }
                    else if (!strcmp(symtab[j].opcode, "BYTE")){ // opcode가 BYTE이면 oprand 첫 부분이 무엇인지 구분 
                        token = strtok(NULL"\t\n");
                        strcpy(symtab[j].oprand, token);
                        if(symtab[j].oprand[0== 'C'){
                            tmp_loc = strlen(symtab[j].oprand)-1-2// ex) C'EOF' 이면 C,',' 세개를 제외한 개수 3 
                        }
                        else if(symtab[j].oprand[0== 'X'){ // oprand의 첫 글자가 X 이면 주소 + 1 
                            tmp_loc = 1
                        }
                        locctr += tmp_loc;
                        break;
                    }
                    else if(!strcmp(symtab[j].opcode, "RESB")){ // opcode가 RESB이면 oprand의 10진수 숫자를 locctr에 더함
                        token = strtok(NULL"\t\n");
                        strcpy(symtab[j].oprand, token);
                        tmp_loc = atoi(symtab[j].oprand);
                        locctr += tmp_loc;
                    }
                    else if(!strcmp(symtab[j].opcode, "RESW")){ // opcode가 RESW이면 oprand의 10진수 숫자에 3을 곱해 locctr에 더함
                        token = strtok(NULL"\t\n");
                        strcpy(symtab[j].oprand, token);
                        locctr += 3 * atoi(symtab[j].oprand);
                    }
                    else if(!strcmp(symtab[j].opcode, "WORD")){ // opcode가 WORD이면 locctr에 3을 더함 
                        token = strtok(NULL"\t\n");
                        strcpy(symtab[j].oprand, token);
                        locctr += 3;
                    }
                    else // 나머지 opcode가 나오면 OPTAB에 정의되어 있는 단어인지 판독  
                        for(i=0; i<sizeof(optab)/12; i++){ 
                            if(!strcmp(optab[i].name, symtab[j].opcode)){ // opcode가 OPTAB에 정의되어 있는 단어이면 locctr에 3을 더하고 정상 flag 설정  
                                token = strtok(NULL"\t\n");
                                strcpy(symtab[j].oprand, token); 
                                locctr += 3;
                                flag2 = 1;
                            }
                        }
                        if(i==sizeof(optab)/12 && flag2==0){ // OPTAB에 정의된 단어가 아니면 오류메시지 출력 후 다음 줄 읽어옴  
                            printf("Undefined Word...\n");
                            return 4;
                        }
                    break;
                }
            token = strtok(NULL"\t\n"); // NULL 다음 token 읽어옴 
        }
    }
    j++// SYMTAB의 lable 개수를 하나 더함 
    if(cnt == 190 || cnt == 105)
        cnt += 20;
    else cnt += 5;
    return 0
}
 
int fprint(int res, FILE *sample_1st){ // pass1을 txt파일로 출력 
    if(flag == 2){
        fputs("\n", sample_1st); // 입력 파일에 빈 줄이 나오면 txt파일에 빈 줄 출력
    } 
    else if(!flag){ // opcode에 이상이 없으면 파일에 출력 (flag가 0이면 실행) 
        if(flag3 == 1 ){ // lable 부분이 공백이면 파일의 lable 부분에 space 출력 (나중에 구분하기 위함) 
            fprintf(sample_1st,"%3d\t%X\t%s%s\t%s\n", cnt, symtab[j-1].symloc, " \t", symtab[j-1].opcode, symtab[j-1].oprand); 
        }
        else 
            fprintf(sample_1st,"%3d\t%X\t%s\t%s\t%s\n", cnt, symtab[j-1].symloc, symtab[j-1].lable, symtab[j-1].opcode, symtab[j-1].oprand); 
    }
    else if(flag == 3){ // opcode에 END가 나오면 현재 주소를 제외하고 파일에 출력하고 종료
        fprintf(sample_1st,"%3d\t%s%s%s\t%s\n", cnt, " \t"" \t", symtab[j-1].opcode, symtab[j-1].oprand);  
        return 4;
    }
    else if(flag == 4){ // 아무것도 하지않고 다음 줄 읽음 
    }
    else if(flag) // flag가 0,2,3,4가 아니면 break 
        return 4;
    return res;
}
 

 

설명 :

위에 올려둔 알고리즘을 참고하여 차근차근 작성하시면 될 것 같습니다.

제가 작성한 코드는 완벽한 코드가 아니므로 2 pass assembler를 만드시는데 참고용으로 사용하세요.

혹시 더 자세한 설명을 원하시면 아래 링크를 참고하시면 됩니다.

http://genderi.org/unit--2-assembler-design.html

 

Unit- 2 Assembler Design

The input to the assembler is a source code written in assembly language (using mnemonics) and the output is the object code. The design of an assembler depends upon the machine architecture as the language used is mnemonic language

genderi.org