The translation is temporarily closed for contributions due to maintenance, please come back later.

Source string Read only

(itstool) path: sect3/para
Context English State
To get the 11th field of each record, I can now do:
<prompt>%</prompt> <userinput>csv '-t;' <replaceable>data.csv</replaceable> | awk '-F;' '{print $11}'</userinput>
The code stores the options (except for the file descriptors) in <varname role="register">EDX</varname>: The comma in <varname role="register">DH</varname>, the new separator in <varname role="register">DL</varname>, and the flag for the <parameter>-p</parameter> option in the highest bit of <varname role="register">EDX</varname>, so a check for its sign will give us a quick decision what to do.
Here is the code:
;;;;;;; csv.asm ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; Convert a comma-separated file to a something-else separated file.
;
; Started: 31-May-2001
; Updated: 1-Jun-2001
;
; Copyright (c) 2001 G. Adam Stanislav
; All rights reserved.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

%include 'system.inc'

%define BUFSIZE 2048

section .data
fd.in dd stdin
fd.out dd stdout
usg db 'Usage: csv [-t&lt;delim&gt;] [-c&lt;comma&gt;] [-p] [-o &lt;outfile&gt;] [-i &lt;infile&gt;]', 0Ah
usglen equ $-usg
iemsg db "csv: Can't open input file", 0Ah
iemlen equ $-iemsg
oemsg db "csv: Can't create output file", 0Ah
oemlen equ $-oemsg

section .bss
ibuffer resb BUFSIZE
obuffer resb BUFSIZE

section .text
align 4
ierr:
push dword iemlen
push dword iemsg
push dword stderr
sys.write
push dword 1 ; return failure
sys.exit

align 4
oerr:
push dword oemlen
push dword oemsg
push dword stderr
sys.write
push dword 2
sys.exit

align 4
usage:
push dword usglen
push dword usg
push dword stderr
sys.write
push dword 3
sys.exit

align 4
global _start
_start:
add esp, byte 8 ; discard argc and argv[0]
mov edx, (',' &lt;&lt; 8) | 9

.arg:
pop ecx
or ecx, ecx
je near .init ; no more arguments

; ECX contains the pointer to an argument
cmp byte [ecx], '-'
jne usage

inc ecx
mov ax, [ecx]

.o:
cmp al, 'o'
jne .i

; Make sure we are not asked for the output file twice
cmp dword [fd.out], stdout
jne usage

; Find the path to output file - it is either at [ECX+1],
; i.e., -ofile --
; or in the next argument,
; i.e., -o file

inc ecx
or ah, ah
jne .openoutput
pop ecx
jecxz usage

.openoutput:
push dword 420 ; file mode (644 octal)
push dword 0200h | 0400h | 01h
; O_CREAT | O_TRUNC | O_WRONLY
push ecx
sys.open
jc near oerr

add esp, byte 12
mov [fd.out], eax
jmp short .arg

.i:
cmp al, 'i'
jne .p

; Make sure we are not asked twice
cmp dword [fd.in], stdin
jne near usage

; Find the path to the input file
inc ecx
or ah, ah
jne .openinput
pop ecx
or ecx, ecx
je near usage

.openinput:
push dword 0 ; O_RDONLY
push ecx
sys.open
jc near ierr ; open failed

add esp, byte 8
mov [fd.in], eax
jmp .arg

.p:
cmp al, 'p'
jne .t
or ah, ah
jne near usage
or edx, 1 &lt;&lt; 31
jmp .arg

.t:
cmp al, 't' ; redefine output delimiter
jne .c
or ah, ah
je near usage
mov dl, ah
jmp .arg

.c:
cmp al, 'c'
jne near usage
or ah, ah
je near usage
mov dh, ah
jmp .arg

align 4
.init:
sub eax, eax
sub ebx, ebx
sub ecx, ecx
mov edi, obuffer

; See if we are to preserve the first line
or edx, edx
js .loop

.firstline:
; get rid of the first line
call getchar
cmp al, 0Ah
jne .firstline

.loop:
; read a byte from stdin
call getchar

; is it a comma (or whatever the user asked for)?
cmp al, dh
jne .quote

; Replace the comma with a tab (or whatever the user wants)
mov al, dl

.put:
call putchar
jmp short .loop

.quote:
cmp al, '"'
jne .put

; Print everything until you get another quote or EOL. If it
; is a quote, skip it. If it is EOL, print it.
.qloop:
call getchar
cmp al, '"'
je .loop

cmp al, 0Ah
je .put

call putchar
jmp short .qloop

align 4
getchar:
or ebx, ebx
jne .fetch

call read

.fetch:
lodsb
dec ebx
ret

read:
jecxz .read
call write

.read:
push dword BUFSIZE
mov esi, ibuffer
push esi
push dword [fd.in]
sys.read
add esp, byte 12
mov ebx, eax
or eax, eax
je .done
sub eax, eax
ret

align 4
.done:
call write ; flush output buffer

; close files
push dword [fd.in]
sys.close

push dword [fd.out]
sys.close

; return success
push dword 0
sys.exit

align 4
putchar:
stosb
inc ecx
cmp ecx, BUFSIZE
je write
ret

align 4
write:
jecxz .ret ; nothing to write
sub edi, ecx ; start of buffer
push ecx
push edi
push dword [fd.out]
sys.write
add esp, byte 12
sub eax, eax
sub ecx, ecx ; buffer is empty now
.ret:
ret
Much of it is taken from <filename>hex.asm</filename> above. But there is one important difference: I no longer call <function>write</function> whenever I am outputting a line feed. Yet, the code can be used interactively.
I have found a better solution for the interactive problem since I first started writing this chapter. I wanted to make sure each line is printed out separately only when needed. After all, there is no need to flush out every line when used non-interactively.
The new solution I use now is to call <function>write</function> every time I find the input buffer empty. That way, when running in the interactive mode, the program reads one line from the user's keyboard, processes it, and sees its input buffer is empty. It flushes its output and reads the next line.
The Dark Side of Buffering
This change prevents a mysterious lockup in a very specific case. I refer to it as the <emphasis>dark side of buffering</emphasis>, mostly because it presents a danger that is not quite obvious.
It is unlikely to happen with a program like the <application>csv</application> above, so let us consider yet another filter: In this case we expect our input to be raw data representing color values, such as the <emphasis>red</emphasis>, <emphasis>green</emphasis>, and <emphasis>blue</emphasis> intensities of a pixel. Our output will be the negative of our input.
Such a filter would be very simple to write. Most of it would look just like all the other filters we have written so far, so I am only going to show you its inner loop:
.loop:
call getchar
not al ; Create a negative
call putchar
jmp short .loop
Because this filter works with raw data, it is unlikely to be used interactively.
But it could be called by image manipulation software. And, unless it calls <function>write</function> before each call to <function>read</function>, chances are it will lock up.
Here is what might happen:
The image editor will load our filter using the C function <function>popen()</function>.
It will read the first row of pixels from a bitmap or pixmap.
It will write the first row of pixels to the <emphasis>pipe</emphasis> leading to the <varname>fd.in</varname> of our filter.
Our filter will read each pixel from its input, turn it to a negative, and write it to its output buffer.
Our filter will call <function>getchar</function> to fetch the next pixel.
<function>getchar</function> will find an empty input buffer, so it will call <function>read</function>.
<function>read</function> will call the <function role="syscall">SYS_read</function> system call.
The <emphasis>kernel</emphasis> will suspend our filter until the image editor sends more data to the pipe.
The image editor will read from the other pipe, connected to the <varname>fd.out</varname> of our filter so it can set the first row of the output image <emphasis>before</emphasis> it sends us the second row of the input.
The <emphasis>kernel</emphasis> suspends the image editor until it receives some output from our filter, so it can pass it on to the image editor.
At this point our filter waits for the image editor to send it more data to process, while the image editor is waiting for our filter to send it the result of the processing of the first row. But the result sits in our output buffer.
The filter and the image editor will continue waiting for each other forever (or, at least, until they are killed). Our software has just entered a <link linkend="secure-race-conditions">race condition</link>.
This problem does not exist if our filter flushes its output buffer <emphasis>before</emphasis> asking the <emphasis>kernel</emphasis> for more input data.
Using the <acronym>FPU</acronym>
Strangely enough, most of assembly language literature does not even mention the existence of the <acronym>FPU</acronym>, or <emphasis>floating point unit</emphasis>, let alone discuss programming it.

Loading…

No matching activity found.

Browse all component changes

Source information

Source string comment
(itstool) path: sect3/para
Flags
read-only
Source string location
book.translate.xml:12561
String age
a year ago
Source string age
a year ago
Translation file
books/developers-handbook.pot, string 1945