1 /**************************************************************************/
2 /* */
3 /* OCaml */
4 /* */
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
6 /* */
7 /* Copyright 1996 Institut National de Recherche en Informatique et */
8 /* en Automatique. */
9 /* */
10 /* All rights reserved. This file is distributed under the terms of */
11 /* the GNU Lesser General Public License version 2.1, with the */
12 /* special exception on linking described in the file LICENSE. */
13 /* */
14 /**************************************************************************/
15
16 #define CAML_INTERNALS
17
18 /* Operations on strings */
19
20 #include <string.h>
21 #include <ctype.h>
22 #include <stdio.h>
23 #include <stdarg.h>
24 #include "caml/alloc.h"
25 #include "caml/fail.h"
26 #include "caml/memory.h"
27 #include "caml/mlvalues.h"
28 #include "caml/misc.h"
29
30 /* returns a number of bytes (chars) */
31 CAMLexport mlsize_t caml_string_length(value s)
32 {
33 mlsize_t temp;
34 temp = Bosize_val(s) - 1;
35 CAMLassert (Byte (s, temp - Byte (s, temp)) == 0);
36 return temp - Byte (s, temp);
37 }
38
39 /* returns a value that represents a number of bytes (chars) */
40 CAMLprim value caml_ml_string_length(value s)
41 {
42 mlsize_t temp;
43 temp = Bosize_val(s) - 1;
44 CAMLassert (Byte (s, temp - Byte (s, temp)) == 0);
45 return Val_long(temp - Byte (s, temp));
46 }
47
48 CAMLprim value caml_ml_bytes_length(value s)
49 {
50 return caml_ml_string_length(s);
51 }
52
53 CAMLexport int caml_string_is_c_safe (value s)
54 {
55 return strlen(String_val(s)) == caml_string_length(s);
56 }
57
58 /**
59 * [caml_create_string] is deprecated,
60 * use [caml_create_bytes] instead
61 */
62 CAMLprim value caml_create_string(value len)
63 {
64 mlsize_t size = Long_val(len);
65 if (size > Bsize_wsize (Max_wosize) - 1){
66 caml_invalid_argument("String.create");
67 }
68 return caml_alloc_string(size);
69 }
70
71 /* [len] is a value that represents a number of bytes (chars) */
72 CAMLprim value caml_create_bytes(value len)
73 {
74 mlsize_t size = Long_val(len);
75 if (size > Bsize_wsize (Max_wosize) - 1){
76 caml_invalid_argument("Bytes.create");
77 }
78 return caml_alloc_string(size);
79 }
80
81
82
83 CAMLprim value caml_string_get(value str, value index)
84 {
85 intnat idx = Long_val(index);
86 if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error();
87 return Val_int(Byte_u(str, idx));
88 }
89
90 CAMLprim value caml_bytes_get(value str, value index)
91 {
92 return caml_string_get(str, index);
93 }
94
95 CAMLprim value caml_bytes_set(value str, value index, value newval)
96 {
97 intnat idx = Long_val(index);
98 if (idx < 0 || idx >= caml_string_length(str)) caml_array_bound_error();
99 Byte_u(str, idx) = Int_val(newval);
100 return Val_unit;
101 }
102
103 /**
104 * [caml_string_set] is deprecated,
105 * use [caml_bytes_set] instead
106 */
107 CAMLprim value caml_string_set(value str, value index, value newval)
108 {
109 return caml_bytes_set(str,index,newval);
110 }
111
112
113 CAMLprim value caml_string_get16(value str, value index)
114 {
115 intnat res;
116 unsigned char b1, b2;
117 intnat idx = Long_val(index);
118 if (idx < 0 || idx + 1 >= caml_string_length(str)) caml_array_bound_error();
119 b1 = Byte_u(str, idx);
120 b2 = Byte_u(str, idx + 1);
121 #ifdef ARCH_BIG_ENDIAN
122 res = b1 << 8 | b2;
123 #else
124 res = b2 << 8 | b1;
125 #endif
126 return Val_int(res);
127 }
128
129 CAMLprim value caml_bytes_get16(value str, value index)
130 {
131 return caml_string_get16(str,index);
132 }
133
134 CAMLprim value caml_string_get32(value str, value index)
135 {
136 int32_t res;
137 unsigned char b1, b2, b3, b4;
138 intnat idx = Long_val(index);
139 if (idx < 0 || idx + 3 >= caml_string_length(str)) caml_array_bound_error();
140 b1 = Byte_u(str, idx);
141 b2 = Byte_u(str, idx + 1);
142 b3 = Byte_u(str, idx + 2);
143 b4 = Byte_u(str, idx + 3);
144 #ifdef ARCH_BIG_ENDIAN
145 res = b1 << 24 | b2 << 16 | b3 << 8 | b4;
146 #else
147 res = b4 << 24 | b3 << 16 | b2 << 8 | b1;
148 #endif
149 return caml_copy_int32(res);
150 }
151
152 CAMLprim value caml_bytes_get32(value str, value index)
153 {
154 return caml_string_get32(str,index);
155 }
156
157 CAMLprim value caml_string_get64(value str, value index)
158 {
159 uint64_t res;
160 unsigned char b1, b2, b3, b4, b5, b6, b7, b8;
161 intnat idx = Long_val(index);
162 if (idx < 0 || idx + 7 >= caml_string_length(str)) caml_array_bound_error();
163 b1 = Byte_u(str, idx);
164 b2 = Byte_u(str, idx + 1);
165 b3 = Byte_u(str, idx + 2);
166 b4 = Byte_u(str, idx + 3);
167 b5 = Byte_u(str, idx + 4);
168 b6 = Byte_u(str, idx + 5);
169 b7 = Byte_u(str, idx + 6);
170 b8 = Byte_u(str, idx + 7);
171 #ifdef ARCH_BIG_ENDIAN
172 res = (uint64_t) b1 << 56 | (uint64_t) b2 << 48
173 | (uint64_t) b3 << 40 | (uint64_t) b4 << 32
174 | (uint64_t) b5 << 24 | (uint64_t) b6 << 16
175 | (uint64_t) b7 << 8 | (uint64_t) b8;
176 #else
177 res = (uint64_t) b8 << 56 | (uint64_t) b7 << 48
178 | (uint64_t) b6 << 40 | (uint64_t) b5 << 32
179 | (uint64_t) b4 << 24 | (uint64_t) b3 << 16
180 | (uint64_t) b2 << 8 | (uint64_t) b1;
181 #endif
182 return caml_copy_int64(res);
183 }
184
185 CAMLprim value caml_bytes_get64(value str, value index)
186 {
187 return caml_string_get64(str,index);
188 }
189
190 CAMLprim value caml_bytes_set16(value str, value index, value newval)
191 {
192 unsigned char b1, b2;
193 intnat val;
194 intnat idx = Long_val(index);
195 if (idx < 0 || idx + 1 >= caml_string_length(str)) caml_array_bound_error();
196 val = Long_val(newval);
197 #ifdef ARCH_BIG_ENDIAN
198 b1 = 0xFF & val >> 8;
199 b2 = 0xFF & val;
200 #else
201 b2 = 0xFF & val >> 8;
202 b1 = 0xFF & val;
203 #endif
204 Byte_u(str, idx) = b1;
205 Byte_u(str, idx + 1) = b2;
206 return Val_unit;
207 }
208
209 CAMLprim value caml_bytes_set32(value str, value index, value newval)
210 {
211 unsigned char b1, b2, b3, b4;
212 intnat val;
213 intnat idx = Long_val(index);
214 if (idx < 0 || idx + 3 >= caml_string_length(str)) caml_array_bound_error();
215 val = Int32_val(newval);
216 #ifdef ARCH_BIG_ENDIAN
217 b1 = 0xFF & val >> 24;
218 b2 = 0xFF & val >> 16;
219 b3 = 0xFF & val >> 8;
220 b4 = 0xFF & val;
221 #else
222 b4 = 0xFF & val >> 24;
223 b3 = 0xFF & val >> 16;
224 b2 = 0xFF & val >> 8;
225 b1 = 0xFF & val;
226 #endif
227 Byte_u(str, idx) = b1;
228 Byte_u(str, idx + 1) = b2;
229 Byte_u(str, idx + 2) = b3;
230 Byte_u(str, idx + 3) = b4;
231 return Val_unit;
232 }
233
234 CAMLprim value caml_bytes_set64(value str, value index, value newval)
235 {
236 unsigned char b1, b2, b3, b4, b5, b6, b7, b8;
237 int64_t val;
238 intnat idx = Long_val(index);
239 if (idx < 0 || idx + 7 >= caml_string_length(str)) caml_array_bound_error();
240 val = Int64_val(newval);
241 #ifdef ARCH_BIG_ENDIAN
242 b1 = 0xFF & val >> 56;
243 b2 = 0xFF & val >> 48;
244 b3 = 0xFF & val >> 40;
245 b4 = 0xFF & val >> 32;
246 b5 = 0xFF & val >> 24;
247 b6 = 0xFF & val >> 16;
248 b7 = 0xFF & val >> 8;
249 b8 = 0xFF & val;
250 #else
251 b8 = 0xFF & val >> 56;
252 b7 = 0xFF & val >> 48;
253 b6 = 0xFF & val >> 40;
254 b5 = 0xFF & val >> 32;
255 b4 = 0xFF & val >> 24;
256 b3 = 0xFF & val >> 16;
257 b2 = 0xFF & val >> 8;
258 b1 = 0xFF & val;
259 #endif
260 Byte_u(str, idx) = b1;
261 Byte_u(str, idx + 1) = b2;
262 Byte_u(str, idx + 2) = b3;
263 Byte_u(str, idx + 3) = b4;
264 Byte_u(str, idx + 4) = b5;
265 Byte_u(str, idx + 5) = b6;
266 Byte_u(str, idx + 6) = b7;
267 Byte_u(str, idx + 7) = b8;
268 return Val_unit;
269 }
270
271 CAMLprim value caml_string_equal(value s1, value s2)
272 {
273 mlsize_t sz1, sz2;
274 value * p1, * p2;
275
276 if (s1 == s2) return Val_true;
277 sz1 = Wosize_val(s1);
278 sz2 = Wosize_val(s2);
279 if (sz1 != sz2) return Val_false;
280 for(p1 = Op_val(s1), p2 = Op_val(s2); sz1 > 0; sz1--, p1++, p2++)
281 if (*p1 != *p2) return Val_false;
282 return Val_true;
283 }
284
285 CAMLprim value caml_bytes_equal(value s1, value s2)
286 {
287 return caml_string_equal(s1,s2);
288 }
289
290 CAMLprim value caml_string_notequal(value s1, value s2)
291 {
292 return Val_not(caml_string_equal(s1, s2));
293 }
294
295 CAMLprim value caml_bytes_notequal(value s1, value s2)
296 {
297 return caml_string_notequal(s1,s2);
298 }
299
300 CAMLprim value caml_string_compare(value s1, value s2)
301 {
302 mlsize_t len1, len2;
303 int res;
304
305 if (s1 == s2) return Val_int(0);
306 len1 = caml_string_length(s1);
307 len2 = caml_string_length(s2);
308 res = memcmp(String_val(s1), String_val(s2), len1 <= len2 ? len1 : len2);
309 if (res < 0) return Val_int(-1);
310 if (res > 0) return Val_int(1);
311 if (len1 < len2) return Val_int(-1);
312 if (len1 > len2) return Val_int(1);
313 return Val_int(0);
314 }
315
316 CAMLprim value caml_bytes_compare(value s1, value s2)
317 {
318 return caml_string_compare(s1,s2);
319 }
320
321 CAMLprim value caml_string_lessthan(value s1, value s2)
322 {
323 return caml_string_compare(s1, s2) < Val_int(0) ? Val_true : Val_false;
324 }
325
326 CAMLprim value caml_bytes_lessthan(value s1, value s2)
327 {
328 return caml_string_lessthan(s1,s2);
329 }
330
331
332 CAMLprim value caml_string_lessequal(value s1, value s2)
333 {
334 return caml_string_compare(s1, s2) <= Val_int(0) ? Val_true : Val_false;
335 }
336
337 CAMLprim value caml_bytes_lessequal(value s1, value s2)
338 {
339 return caml_string_lessequal(s1,s2);
340 }
341
342
343 CAMLprim value caml_string_greaterthan(value s1, value s2)
344 {
345 return caml_string_compare(s1, s2) > Val_int(0) ? Val_true : Val_false;
346 }
347
348 CAMLprim value caml_bytes_greaterthan(value s1, value s2)
349 {
350 return caml_string_greaterthan(s1,s2);
351 }
352
353 CAMLprim value caml_string_greaterequal(value s1, value s2)
354 {
355 return caml_string_compare(s1, s2) >= Val_int(0) ? Val_true : Val_false;
356 }
357
358 CAMLprim value caml_bytes_greaterequal(value s1, value s2)
359 {
360 return caml_string_greaterequal(s1,s2);
361 }
362
363 CAMLprim value caml_blit_bytes(value s1, value ofs1, value s2, value ofs2,
364 value n)
365 {
366 memmove(&Byte(s2, Long_val(ofs2)), &Byte(s1, Long_val(ofs1)), Long_val(n));
367 return Val_unit;
368 }
369
370 CAMLprim value caml_blit_string(value s1, value ofs1, value s2, value ofs2,
371 value n)
372 {
373 return caml_blit_bytes (s1, ofs1, s2, ofs2, n);
374 }
375
376 CAMLprim value caml_fill_bytes(value s, value offset, value len, value init)
377 {
378 memset(&Byte(s, Long_val(offset)), Int_val(init), Long_val(len));
379 return Val_unit;
380 }
381
382 /**
383 * [caml_fill_string] is deprecated, use [caml_fill_bytes] instead
384 */
385 CAMLprim value caml_fill_string(value s, value offset, value len, value init)
386 {
387 return caml_fill_bytes (s, offset, len, init);
388 }
389
390 CAMLexport value caml_alloc_sprintf(const char * format, ...)
391 {
392 va_list args;
393 char buf[128];
394 int n;
395 value res;
396
397 #if !defined(_WIN32) || defined(_UCRT)
398 /* C99-compliant implementation */
399 va_start(args, format);
400 /* "vsnprintf(dest, sz, format, args)" writes at most "sz" characters
401 into "dest", including the terminating '\0'.
402 It returns the number of characters of the formatted string,
403 excluding the terminating '\0'. */
404 n = vsnprintf(buf, sizeof(buf), format, args);
405 va_end(args);
406 if (n < sizeof(buf)) {
407 /* All output characters were written to buf, including the
408 terminating '\0'. Allocate a Caml string with length "n"
409 as computed by vsnprintf, and copy the output of vsnprintf into it. */
410 res = caml_alloc_initialized_string(n, buf);
411 } else {
412 /* PR#7568: if the format is in the Caml heap, the following
413 caml_alloc_string could move or free the format. To prevent
414 this, take a copy of the format outside the Caml heap. */
415 char * saved_format = caml_stat_strdup(format);
416 /* Allocate a Caml string with length "n" as computed by vsnprintf. */
417 res = caml_alloc_string(n);
418 /* Re-do the formatting, outputting directly in the Caml string.
419 Note that caml_alloc_string left room for a '\0' at position n,
420 so the size passed to vsnprintf is n+1. */
421 va_start(args, format);
422 vsnprintf((char *)String_val(res), n + 1, saved_format, args);
423 va_end(args);
424 caml_stat_free(saved_format);
425 }
426 return res;
427 #else
428 /* Implementation specific to the Microsoft CRT library */
429 va_start(args, format);
430 /* "_vsnprintf(dest, sz, format, args)" writes at most "sz" characters
431 into "dest". Let "len" be the number of characters of the formatted
432 string.
433 If "len" < "sz", a null terminator was appended, and "len" is returned.
434 If "len" == "sz", no null termination, and "len" is returned.
435 If "len" > "sz", a negative value is returned. */
436 n = _vsnprintf(buf, sizeof(buf), format, args);
437 va_end(args);
438 if (n >= 0 && n <= sizeof(buf)) {
439 /* All output characters were written to buf.
440 "n" is the actual length of the output.
441 Allocate a Caml string of length "n" and copy the characters into it. */
442 res = caml_alloc_string(n);
443 memcpy((char *)String_val(res), buf, n);
444 } else {
445 /* PR#7568: if the format is in the Caml heap, the following
446 caml_alloc_string could move or free the format. To prevent
447 this, take a copy of the format outside the Caml heap. */
448 char * saved_format = caml_stat_strdup(format);
449 /* Determine actual length of output, excluding final '\0' */
450 va_start(args, format);
451 n = _vscprintf(format, args);
452 va_end(args);
453 res = caml_alloc_string(n);
454 /* Re-do the formatting, outputting directly in the Caml string.
455 Note that caml_alloc_string left room for a '\0' at position n,
456 so the size passed to _vsnprintf is n+1. */
457 va_start(args, format);
458 _vsnprintf((char *)String_val(res), n + 1, saved_format, args);
459 va_end(args);
460 caml_stat_free(saved_format);
461 }
462 return res;
463 #endif
464 }
465
466 CAMLprim value caml_string_of_bytes(value bv)
467 {
468 return bv;
469 }
470
471 CAMLprim value caml_bytes_of_string(value bv)
472 {
473 return bv;
474 }
475