bufr2synop 0.24.0
bufrdeco_csv.c
Go to the documentation of this file.
1/***************************************************************************
2 * Copyright (C) 2013-2022 by Guillermo Ballester Valor *
3 * gbv@ogimet.com *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20/*!
21 \file bufrdeco_csv.c
22 \brief This file has the code of useful routines for read or write csv formatted lines
23*/
24#include "bufrdeco.h"
25
26#define CSV_WAIT_ITEM 1
27#define CSV_FINISHED_ITEM 2
28#define CSV_IS_ITEM 4
29#define CSV_IS_SEPARATOR 8
30#define CSV_IS_CITED 16
31#define CSV_WAIT_SEPARATOR 32
32#define CSV_IS_DOUBLE_QUOTED 64
33
34const char CSV_SPACE[] = " \t\r";
35const char CSV_SEPARATOR = ',';
36const char CSV_CITE = '\"';
37const char CSV_FINAL = '\n';
38
39
40/*!
41 \fn char * csv_quoted_string( char *out, char *in)
42 \brief Transform a string to a quoted string to be inserted in a csv file
43 \param out resulting string
44 \param in input string
45
46 \return If problem returns NULL, otherwise out
47*/
48char * csv_quoted_string ( char *out, char *in )
49{
50 size_t i = 0, j = 0;
51
52 if ( in == NULL || out == NULL)
53 return NULL;
54
55 if ( in[0] == 0 )
56 return in;
57
58 out[j++] = '\"';
59 while ( in[i] && j < ( CSV_MAXL - 1 ) && i < CSV_MAXL )
60 {
61 if ( in[i] == '\"' )
62 {
63 out[j++] = '\"';
64 out[j++] = '\"';
65 i++;
66 }
67 else
68 out[j++] = in[i++];
69 }
70 out[j++] = '\"';
71 out[j] = 0; // end of string
72 return out;
73}
74
75
76/*!
77 \fn int parse_csv_line2(int *nt, char *tk[], char *lin)
78 \brief Parse a csv line
79 \param nt pointer to a integer. On success is the number of items found
80 \param tk array of pointers. Every pointer is a item on success
81 \param lin input line which is modified in this routine to be splitted into items
82 \return On success return 0, otherwise -1
83
84 NOTE that input line is modified
85*/
86int parse_csv_line2 ( int *nt, char *tk[], char *lin )
87{
88 size_t i, j, k = 0, l, latest_char = 0;
89 int flag;
90 char c, caux[CSV_MAXL];
91
92 bufrdeco_assert (lin != NULL && tk != NULL && nt != NULL);
93
94 l = strlen ( lin );
95 if ( l >= CSV_MAXL || l == 0 )
96 return -1;
97
98 // copy original string. In this case the copy is safe
99 strcpy ( caux, lin );
100
101 flag = CSV_WAIT_ITEM;
102
103 for ( i = 0, j = 0, k = 0; i < l; i++ )
104 {
105 c = caux[i]; // original char
106
107 if ( ( flag & CSV_IS_CITED ) == 0 )
108 {
109 if ( flag & CSV_WAIT_ITEM )
110 {
111 if ( strchr ( CSV_SPACE, c ) != NULL )
112 {
113 lin[j++] = c; // copy to target
114 continue;
115 }
116 else if ( c == CSV_CITE )
117 {
118 lin[j++] = '\0'; // copy to target
119 flag |= CSV_IS_CITED;
120 continue;
121 }
122 else if ( c == CSV_SEPARATOR )
123 {
124 /* item vacio */
125 tk[k++] = lin + j;
126 lin[j++] = '\0';
127 flag = CSV_WAIT_ITEM;
128 continue;
129 }
130 else if ( c == CSV_FINAL )
131 {
132 if ( flag & CSV_FINISHED_ITEM )
133 {
134 *nt = k;
135 return 0;
136 }
137 /* al menos un token, vacio */
138 tk[k++] = lin + j;
139 lin[j] = '\0';
140 *nt = k;
141 return 0;
142 }
143 else
144 {
145 latest_char = j;
146 tk[k] = lin + j;
147 lin[j++] = c;
148 flag = CSV_IS_ITEM;
149 continue;
150 }
151 }
152 else if ( flag & CSV_IS_ITEM )
153 {
154 if ( strchr ( CSV_SPACE, c ) != NULL )
155 {
156 lin[j++] = c;
157 continue;
158 }
159 else if ( c == CSV_SEPARATOR )
160 {
161 lin[latest_char + 1] = '\0';
162 j = latest_char + 2;
163 k++;
165 continue;
166 }
167 else if ( c == CSV_FINAL )
168 {
169 lin[latest_char + 1] = '\0';
170 k++;
171 *nt = k;
172 return 0;
173 }
174 else
175 {
176 latest_char = j++;
177 }
178 }
179 else if ( flag & CSV_WAIT_SEPARATOR )
180 {
181 if ( c == CSV_SEPARATOR )
182 {
183 lin[j++] = '\0';
184 k++;
185 flag = CSV_WAIT_ITEM;
186 }
187 else if ( c == CSV_FINAL )
188 {
189 lin[j++] = '\0';
190 *nt = k + 1;
191 return 0;
192 }
193 continue;
194 }
195 }
196 else /* CITED */
197 {
198 if ( flag & CSV_WAIT_ITEM )
199 {
200 tk[k] = lin + j;
201 flag &= ( ~CSV_WAIT_ITEM ); // Clean wait item bit
202 }
203 if ( c == CSV_CITE )
204 {
205 if ( caux[i + 1] == CSV_CITE )
206 {
207 if ( flag & CSV_IS_DOUBLE_QUOTED )
208 {
209 flag &= ( ~CSV_IS_DOUBLE_QUOTED ) ; // Clean double quoted bit
210 lin[j++] = '"';
211 i++;
212 }
213 else
214 {
215 flag |= CSV_IS_DOUBLE_QUOTED;
216 lin[j++] = '"';
217 i++;
218 }
219 continue;
220 }
221 else
222 {
223 lin[j++] = '\0';
224 flag = CSV_WAIT_SEPARATOR;
225 }
226 }
227 else if ( c == CSV_FINAL )
228 {
229 lin[j++] = '\0';
230 *nt = k + 1;
231 return 0;
232 }
233 else
234 {
235 lin[j++] = c;
236 }
237 continue;
238 }
239 }
240 return -1;
241}
242
243/*!
244 \fn int parse_csv_line(int *nt, char *tk[], char *lin)
245 \brief Parse a csv line
246 \param nt pointer to a integer. On success is the number of items found
247 \param tk array of pointers. Every pointer is a item on success
248 \param lin input line which is modified in this routine to be splitted into items
249 \return On success return 0, otherwise -1
250
251 This is an optimization of routine parse_csv_line2(). Here we suppose:
252 -All no void items are closed between "" and separed by comas ','
253 -No " in items.
254 -No blank spaces at the end nor begin of items
255
256 NOTE that input line is modified
257*/
258int parse_csv_line ( int *nt, char *tk[], char *lin )
259{
260 char *cq[CSV_MAXL / 4], *cc[CSV_MAXL / 4], *c0;
261 buf_t nc, nq, i, j;
262
263 //bufrdeco_assert (lin != NULL && tk != NULL && nt != NULL);
264
265 *nt = 0;
266
267 // clean final new line)
268 if ((c0 = strchr(lin, CSV_FINAL)) != NULL)
269 *c0 = '\0';
270
271 c0 = lin;
272 nq = 0;
273 while (c0 != NULL && *c0 && nq < (CSV_MAXL / 4))
274 {
275 if ((c0 = strchr(c0, CSV_CITE)) != NULL)
276 {
277 cq[nq++] = c0++;
278 }
279 }
280
281 if (nq % 2)
282 return -1; // Problem, number of quotes are non paired
283
284 c0 = lin;
285 nc = 0;
286 while (c0 != NULL && *c0 && nc < (CSV_MAXL / 4))
287 {
288 if ((c0 = strchr(c0, CSV_SEPARATOR)) != NULL)
289 {
290 j = 0;
291 if (nq)
292 {
293 for (; j < nq ; j+=2)
294 if ((cq[j] < c0) && (c0 < cq[j + 1]))
295 {
296 j = nq;// is a comma between two cites. Ignored
297 c0++;
298 }
299 }
300 if (j == nq)
301 {
302 // is a comma between two items.
303 cc[nc++] = c0++;
304 }
305 }
306 }
307
308 // Now go to tokens, in this pass, still CSV_CITE are in items
309 c0 = lin;
310 if (nc == 0)
311 tk[(*nt)++] = c0;
312 else
313 {
314 for (i = 0; i < nc; i++)
315 {
316 if (cc[i] == c0)
317 {
318 tk[(*nt)++] = c0;
319 *(c0) = '\0';
320 c0++;
321 }
322 else
323 {
324 tk[(*nt)++] = c0;
325 *(cc[i]) = '\0';
326 c0 = cc[i] + 1;
327 }
328 }
329 }
330 tk[(*nt)++] = c0;
331
332 // fix the tokens supresing the first CSV_CITE if any
333 for (i = 0; i < (buf_t)(*nt); i++)
334 {
335 if( *(tk[i]) == CSV_CITE)
336 (tk[i])++;
337 }
338
339 // And then supress all CSV_CITE (also in the end of every item)
340 for (i = 0; i < nq ; i++)
341 *(cq[i]) = '\0';
342
343 return 0;
344}
Include header file for bufrdeco library.
uint32_t buf_t
Type to set offsets and dimension of arrays or counters used in bufrdeco.
Definition: bufrdeco.h:346
#define bufrdeco_assert(__my_expr__)
Check a expression and exit if it fails.
Definition: bufrdeco.h:374
#define CSV_MAXL
Maximum length in a string to be parsed as csv.
Definition: bufrdeco.h:128
const char CSV_CITE
Definition: bufrdeco_csv.c:36
#define CSV_IS_ITEM
Definition: bufrdeco_csv.c:28
const char CSV_SPACE[]
Definition: bufrdeco_csv.c:34
const char CSV_FINAL
Definition: bufrdeco_csv.c:37
#define CSV_IS_CITED
Definition: bufrdeco_csv.c:30
int parse_csv_line(int *nt, char *tk[], char *lin)
Parse a csv line.
Definition: bufrdeco_csv.c:258
const char CSV_SEPARATOR
Definition: bufrdeco_csv.c:35
#define CSV_FINISHED_ITEM
Definition: bufrdeco_csv.c:27
#define CSV_WAIT_SEPARATOR
Definition: bufrdeco_csv.c:31
char * csv_quoted_string(char *out, char *in)
Transform a string to a quoted string to be inserted in a csv file.
Definition: bufrdeco_csv.c:48
int parse_csv_line2(int *nt, char *tk[], char *lin)
Parse a csv line.
Definition: bufrdeco_csv.c:86
#define CSV_IS_DOUBLE_QUOTED
Definition: bufrdeco_csv.c:32
#define CSV_WAIT_ITEM
Definition: bufrdeco_csv.c:26