souffle  2.0.2-371-g6315b36
StringUtil.h
Go to the documentation of this file.
1 /*
2  * Souffle - A Datalog Compiler
3  * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved
4  * Licensed under the Universal Permissive License v 1.0 as shown at:
5  * - https://opensource.org/licenses/UPL
6  * - <souffle root>/licenses/SOUFFLE-UPL.txt
7  */
8 
9 /************************************************************************
10  *
11  * @file StringUtil.h
12  *
13  * @brief Datalog project utilities
14  *
15  ***********************************************************************/
16 
17 #pragma once
18 
19 #include "souffle/RamTypes.h"
20 #include <algorithm>
21 #include <cctype>
22 #include <cstdlib>
23 #include <fstream>
24 #include <limits>
25 #include <sstream>
26 #include <stdexcept>
27 #include <string>
28 #include <type_traits>
29 #include <typeinfo>
30 #include <vector>
31 
32 namespace souffle {
33 
34 // Forward declaration
35 inline bool isPrefix(const std::string& prefix, const std::string& element);
36 
37 /**
38  * Converts a string to a RamSigned
39  *
40  * This procedure has similar behaviour to std::stoi/stoll.
41  *
42  * The procedure accepts prefixes 0b (if base = 2) and 0x (if base = 16)
43  * If base = 0, the procedure will try to infer the base from the prefix, if present.
44  */
46  const std::string& str, std::size_t* position = nullptr, const int base = 10) {
47  RamSigned val;
48 
49  if (base == 0) {
50  if (isPrefix("-0b", str) || isPrefix("0b", str)) {
51  return RamSignedFromString(str, position, 2);
52  } else if (isPrefix("-0x", str) || isPrefix("0x", str)) {
53  return RamSignedFromString(str, position, 16);
54  } else {
55  return RamSignedFromString(str, position);
56  }
57  }
58  std::string binaryNumber;
59  bool parsingBinary = base == 2;
60 
61  // stoi/stoll can't handle base 2 prefix by default.
62  if (parsingBinary) {
63  if (isPrefix("-0b", str)) {
64  binaryNumber = "-" + str.substr(3);
65  } else if (isPrefix("0b", str)) {
66  binaryNumber = str.substr(2);
67  }
68  }
69  const std::string& tmp = parsingBinary ? binaryNumber : str;
70 
71 #if RAM_DOMAIN_SIZE == 64
72  val = std::stoll(tmp, position, base);
73 #else
74  val = std::stoi(tmp, position, base);
75 #endif
76 
77  if (parsingBinary && position != nullptr) {
78  *position += 2;
79  }
80 
81  return val;
82 }
83 
84 /**
85  * Converts a string to a RamFloat
86  */
87 inline RamFloat RamFloatFromString(const std::string& str, std::size_t* position = nullptr) {
88  RamFloat val;
89 #if RAM_DOMAIN_SIZE == 64
90  val = std::stod(str, position);
91 #else
92  val = std::stof(str, position);
93 #endif
94  return static_cast<RamFloat>(val);
95 }
96 /**
97  * Converts a string to a RamUnsigned
98  *
99  * This procedure has similar behaviour to std::stoul/stoull.
100  *
101  * The procedure accepts prefixes 0b (if base = 2) and 0x (if base = 16)
102  * If base = 0, the procedure will try to infer the base from the prefix, if present.
103  */
105  const std::string& str, std::size_t* position = nullptr, const int base = 10) {
106  // Be default C++ (stoul) allows unsigned numbers starting with "-".
107  if (isPrefix("-", str)) {
108  throw std::invalid_argument("Unsigned number can't start with minus.");
109  }
110 
111  if (base == 0) {
112  if (isPrefix("0b", str)) {
113  return RamUnsignedFromString(str, position, 2);
114  } else if (isPrefix("0x", str)) {
115  return RamUnsignedFromString(str, position, 16);
116  } else {
117  return RamUnsignedFromString(str, position);
118  }
119  }
120 
121  // stoul/stoull can't handle binary prefix by default.
122  std::string binaryNumber;
123  bool parsingBinary = false;
124  if (base == 2 && isPrefix("0b", str)) {
125  binaryNumber = str.substr(2);
126  parsingBinary = true;
127  }
128  const std::string& tmp = parsingBinary ? binaryNumber : str;
129 
130  RamUnsigned val;
131 #if RAM_DOMAIN_SIZE == 64
132  val = std::stoull(tmp, position, base);
133 #else
134  val = std::stoul(tmp, position, base);
135 #endif
136 
137  if (parsingBinary && position != nullptr) {
138  *position += 2;
139  }
140 
141  // check if it's safe to cast (stoul returns unsigned long)
142  if (val > std::numeric_limits<RamUnsigned>::max()) {
143  throw std::invalid_argument("Unsigned number of of bounds");
144  }
145 
146  return static_cast<RamUnsigned>(val);
147 }
148 
149 /**
150  * Can a string be parsed as RamSigned.
151  *
152  * Souffle (parser, not fact file readers) accepts: hex, binary and base 10.
153  * Integer can be negative, in all 3 formats this means that it
154  * starts with minus (c++ default semantics).
155  */
156 inline bool canBeParsedAsRamSigned(const std::string& string) {
157  size_t charactersRead = 0;
158 
159  try {
160  RamSignedFromString(string, &charactersRead, 0);
161  } catch (...) {
162  return false;
163  }
164 
165  return charactersRead == string.size();
166 }
167 
168 /**
169  * Can a string be parsed as RamUnsigned.
170  *
171  * Souffle accepts: hex, binary and base 10.
172  */
173 inline bool canBeParsedAsRamUnsigned(const std::string& string) {
174  size_t charactersRead = 0;
175  try {
176  RamUnsignedFromString(string, &charactersRead, 0);
177  } catch (...) {
178  return false;
179  }
180  return charactersRead == string.size();
181 }
182 
183 /**
184  * Can a string be parsed as RamFloat.
185  */
186 inline bool canBeParsedAsRamFloat(const std::string& string) {
187  size_t charactersRead = 0;
188  try {
189  RamFloatFromString(string, &charactersRead);
190  } catch (...) {
191  return false;
192  }
193  return charactersRead == string.size();
194 }
195 
196 #if RAM_DOMAIN_SIZE == 64
197 inline RamDomain stord(const std::string& str, std::size_t* pos = nullptr, int base = 10) {
198  return static_cast<RamDomain>(std::stoull(str, pos, base));
199 }
200 #elif RAM_DOMAIN_SIZE == 32
201 inline RamDomain stord(const std::string& str, std::size_t* pos = nullptr, int base = 10) {
202  return static_cast<RamDomain>(std::stoul(str, pos, base));
203 }
204 #else
205 #error RAM Domain is neither 32bit nor 64bit
206 #endif
207 
208 /**
209  * Check whether a string is a sequence of digits
210  */
211 inline bool isNumber(const char* str) {
212  if (str == nullptr) {
213  return false;
214  }
215 
216  while (*str != 0) {
217  if (isdigit(*str) == 0) {
218  return false;
219  }
220  str++;
221  }
222  return true;
223 }
224 
225 /**
226  * A generic function converting strings into strings (trivial case).
227  */
228 inline const std::string& toString(const std::string& str) {
229  return str;
230 }
231 
232 namespace detail {
233 
234 /**
235  * A type trait to check whether a given type is printable.
236  * In this general case, nothing is printable.
237  */
238 template <typename T, typename filter = void>
239 struct is_printable : public std::false_type {};
240 
241 /**
242  * A type trait to check whether a given type is printable.
243  * This specialization makes types with an output operator printable.
244  */
245 template <typename T>
246 struct is_printable<T, typename std::conditional<false,
247  decltype(std::declval<std::ostream&>() << std::declval<T>()), void>::type>
248  : public std::true_type {};
249 } // namespace detail
250 
251 /**
252  * A generic function converting arbitrary objects to strings by utilizing
253  * their print capability.
254  *
255  * This function is mainly intended for implementing test cases and debugging
256  * operations.
257  */
258 template <typename T>
259 typename std::enable_if<detail::is_printable<T>::value, std::string>::type toString(const T& value) {
260  // write value into stream and return result
261  std::stringstream ss;
262  ss << value;
263  return ss.str();
264 }
265 
266 /**
267  * A fallback for the to-string function in case an unprintable object is supposed
268  * to be printed.
269  */
270 template <typename T>
271 typename std::enable_if<!detail::is_printable<T>::value, std::string>::type toString(const T&) {
272  std::stringstream ss;
273  ss << "(print for type ";
274  ss << typeid(T).name();
275  ss << " not supported)";
276  return ss.str();
277 }
278 
279 // -------------------------------------------------------------------------------
280 // String Utils
281 // -------------------------------------------------------------------------------
282 
283 /**
284  * Determine if one string is a prefix of another
285  */
286 inline bool isPrefix(const std::string& prefix, const std::string& element) {
287  auto itPrefix = prefix.begin();
288  auto itElement = element.begin();
289 
290  while (itPrefix != prefix.end() && itElement != element.end()) {
291  if (*itPrefix != *itElement) {
292  break;
293  }
294  ++itPrefix;
295  ++itElement;
296  }
297 
298  return itPrefix == prefix.end();
299 }
300 
301 /**
302  * Determines whether the given value string ends with the given
303  * end string.
304  */
305 inline bool endsWith(const std::string& value, const std::string& ending) {
306  if (value.size() < ending.size()) {
307  return false;
308  }
309  return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
310 }
311 
312 /**
313  * Splits a string given a delimiter
314  */
315 inline std::vector<std::string> splitString(const std::string& str, char delimiter) {
316  std::vector<std::string> parts;
317  std::stringstream strstr(str);
318  std::string token;
319  while (std::getline(strstr, token, delimiter)) {
320  parts.push_back(token);
321  }
322  return parts;
323 }
324 
325 /**
326  * Stringify a string using escapes for escape, newline, tab, double-quotes and semicolons
327  */
328 inline std::string stringify(const std::string& input) {
329  std::string str(input);
330 
331  // replace escapes with double escape sequence
332  size_t start_pos = 0;
333  while ((start_pos = str.find('\\', start_pos)) != std::string::npos) {
334  str.replace(start_pos, 1, "\\\\");
335  start_pos += 2;
336  }
337  // replace semicolons with escape sequence
338  start_pos = 0;
339  while ((start_pos = str.find(';', start_pos)) != std::string::npos) {
340  str.replace(start_pos, 1, "\\;");
341  start_pos += 2;
342  }
343  // replace double-quotes with escape sequence
344  start_pos = 0;
345  while ((start_pos = str.find('"', start_pos)) != std::string::npos) {
346  str.replace(start_pos, 1, "\\\"");
347  start_pos += 2;
348  }
349  // replace newline with escape sequence
350  start_pos = 0;
351  while ((start_pos = str.find('\n', start_pos)) != std::string::npos) {
352  str.replace(start_pos, 1, "\\n");
353  start_pos += 2;
354  }
355  // replace tab with escape sequence
356  start_pos = 0;
357  while ((start_pos = str.find('\t', start_pos)) != std::string::npos) {
358  str.replace(start_pos, 1, "\\t");
359  start_pos += 2;
360  }
361  return str;
362 }
363 
364 /**
365  * Escape JSON string.
366  */
367 inline std::string escapeJSONstring(const std::string& JSONstr) {
368  std::ostringstream destination;
369 
370  // Iterate over all characters except first and last
371  for (char c : JSONstr) {
372  if (c == '\"') {
373  destination << "\\";
374  }
375  destination << c;
376  }
377  return destination.str();
378 }
379 
380 /** Valid C++ identifier, note that this does not ensure the uniqueness of identifiers returned. */
381 inline std::string identifier(std::string id) {
382  for (size_t i = 0; i < id.length(); i++) {
383  if (((isalpha(id[i]) == 0) && i == 0) || ((isalnum(id[i]) == 0) && id[i] != '_')) {
384  id[i] = '_';
385  }
386  }
387  return id;
388 }
389 
390 // TODO (b-scholz): tidy up unescape/escape functions
391 
392 inline std::string unescape(
393  const std::string& inputString, const std::string& needle, const std::string& replacement) {
394  std::string result = inputString;
395  size_t pos = 0;
396  while ((pos = result.find(needle, pos)) != std::string::npos) {
397  result = result.replace(pos, needle.length(), replacement);
398  pos += replacement.length();
399  }
400  return result;
401 }
402 
403 inline std::string unescape(const std::string& inputString) {
404  std::string unescaped = unescape(inputString, "\\\"", "\"");
405  unescaped = unescape(unescaped, "\\t", "\t");
406  unescaped = unescape(unescaped, "\\r", "\r");
407  unescaped = unescape(unescaped, "\\n", "\n");
408  return unescaped;
409 }
410 
411 inline std::string escape(
412  const std::string& inputString, const std::string& needle, const std::string& replacement) {
413  std::string result = inputString;
414  size_t pos = 0;
415  while ((pos = result.find(needle, pos)) != std::string::npos) {
416  result = result.replace(pos, needle.length(), replacement);
417  pos += replacement.length();
418  }
419  return result;
420 }
421 
422 inline std::string escape(const std::string& inputString) {
423  std::string escaped = escape(inputString, "\"", "\\\"");
424  escaped = escape(escaped, "\t", "\\t");
425  escaped = escape(escaped, "\r", "\\r");
426  escaped = escape(escaped, "\n", "\\n");
427  return escaped;
428 }
429 
430 } // end namespace souffle
souffle::RamUnsigned
uint32_t RamUnsigned
Definition: RamTypes.h:58
souffle::RamSignedFromString
RamSigned RamSignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamSigned.
Definition: StringUtil.h:51
souffle::isPrefix
bool isPrefix(const std::string &prefix, const std::string &element)
Determine if one string is a prefix of another.
Definition: StringUtil.h:292
souffle::RamDomain
int32_t RamDomain
Definition: RamTypes.h:56
souffle::isNumber
bool isNumber(const char *str)
Check whether a string is a sequence of digits.
Definition: StringUtil.h:217
souffle::canBeParsedAsRamFloat
bool canBeParsedAsRamFloat(const std::string &string)
Can a string be parsed as RamFloat.
Definition: StringUtil.h:192
souffle::RamFloat
float RamFloat
Definition: RamTypes.h:60
base
T & base
Definition: Reader.h:60
str
const std::string & str
Definition: json11.h:662
souffle::toString
const std::string & toString(const std::string &str)
A generic function converting strings into strings (trivial case).
Definition: StringUtil.h:234
souffle::detail::is_printable
A type trait to check whether a given type is printable.
Definition: StringUtil.h:245
souffle::canBeParsedAsRamUnsigned
bool canBeParsedAsRamUnsigned(const std::string &string)
Can a string be parsed as RamUnsigned.
Definition: StringUtil.h:179
std
Definition: Brie.h:3053
souffle::canBeParsedAsRamSigned
bool canBeParsedAsRamSigned(const std::string &string)
Can a string be parsed as RamSigned.
Definition: StringUtil.h:162
RamTypes.h
souffle
Definition: AggregateOp.h:25
souffle::RamUnsignedFromString
RamUnsigned RamUnsignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamUnsigned.
Definition: StringUtil.h:110
souffle::RamSigned
RamDomain RamSigned
Definition: RamTypes.h:57
souffle::RamFloatFromString
RamFloat RamFloatFromString(const std::string &str, std::size_t *position=nullptr)
Converts a string to a RamFloat.
Definition: StringUtil.h:93