souffle  2.0.2-371-g6315b36
Public Member Functions | Protected Member Functions | Protected Attributes
souffle::ReadStreamCSV Class Reference

#include <ReadStreamCSV.h>

Inheritance diagram for souffle::ReadStreamCSV:
Inheritance graph
Collaboration diagram for souffle::ReadStreamCSV:
Collaboration graph

Public Member Functions

 ReadStreamCSV (std::istream &file, const std::map< std::string, std::string > &rwOperation, SymbolTable &symbolTable, RecordTable &recordTable)
 
- Public Member Functions inherited from souffle::ReadStream
template<typename T >
void readAll (T &relation)
 
- Public Member Functions inherited from souffle::SerialisationStream< false >
virtual ~SerialisationStream ()=default
 

Protected Member Functions

std::map< int, int > getInputColumnMap (const std::map< std::string, std::string > &rwOperation, const unsigned arity_) const
 
std::string nextElement (const std::string &line, size_t &start, size_t &end)
 
Own< RamDomain[]> readNextTuple () override
 Read and return the next tuple. More...
 
RamUnsigned readRamUnsigned (const std::string &element, size_t &charactersRead)
 Read an unsigned element. More...
 
- Protected Member Functions inherited from souffle::ReadStream
void consumeChar (const std::string &str, char c, size_t &pos)
 Read past given character, consuming any preceding whitespace. More...
 
void consumeWhiteSpace (const std::string &str, size_t &pos)
 Advance position in the string until first non-whitespace character. More...
 
RamDomain readADT (const std::string &source, const std::string &adtName, size_t pos=0, size_t *charactersRead=nullptr)
 
std::string readAlphanumeric (const std::string &source, size_t &pos)
 Read the next alphanumeric sequence (corresponding to IDENT). More...
 
RamDomain readRecord (const std::string &source, const std::string &recordTypeName, size_t pos=0, size_t *charactersRead=nullptr)
 Read a record from a string. More...
 
 ReadStream (const std::map< std::string, std::string > &rwOperation, SymbolTable &symTab, RecordTable &recTab)
 
std::string readUntil (const std::string &source, const std::string stopChars, const size_t pos, size_t *charactersRead)
 
- Protected Member Functions inherited from souffle::SerialisationStream< false >
 SerialisationStream (RO< SymbolTable > &symTab, RO< RecordTable > &recTab, const std::map< std::string, std::string > &rwOperation)
 
 SerialisationStream (RO< SymbolTable > &symTab, RO< RecordTable > &recTab, Json types)
 
 SerialisationStream (RO< SymbolTable > &symTab, RO< RecordTable > &recTab, Json types, std::vector< std::string > relTypes, size_t auxArity=0)
 

Protected Attributes

const std::string delimiter
 
std::istream & file
 
std::map< int, int > inputMap
 
size_t lineNumber
 
- Protected Attributes inherited from souffle::SerialisationStream< false >
size_t arity
 
size_t auxiliaryArity
 
RO< RecordTable > & recordTable
 
RO< SymbolTable > & symbolTable
 
std::vector< std::string > typeAttributes
 
Json types
 

Additional Inherited Members

- Protected Types inherited from souffle::SerialisationStream< false >
using RO = std::conditional_t< readOnlyTables, const A, A >
 

Detailed Description

Definition at line 49 of file ReadStreamCSV.h.

Constructor & Destructor Documentation

◆ ReadStreamCSV()

souffle::ReadStreamCSV::ReadStreamCSV ( std::istream &  file,
const std::map< std::string, std::string > &  rwOperation,
SymbolTable symbolTable,
RecordTable recordTable 
)
inline

Definition at line 51 of file ReadStreamCSV.h.

51  {
52  while (inputMap.size() < arity) {
53  int size = static_cast<int>(inputMap.size());
54  inputMap[size] = size;
55  }
56  }
57 
58 protected:
59  /**
60  * Read and return the next tuple.

References souffle::SerialisationStream< false >::arity, inputMap, and TCB_SPAN_NAMESPACE_NAME::detail::size().

Here is the call graph for this function:

Member Function Documentation

◆ getInputColumnMap()

std::map<int, int> souffle::ReadStreamCSV::getInputColumnMap ( const std::map< std::string, std::string > &  rwOperation,
const unsigned  arity_ 
) const
inlineprotected

Definition at line 217 of file ReadStreamCSV.h.

218  {
219  std::istringstream iss(columnString);
220  std::string mapping;
221  int index = 0;
222  while (std::getline(iss, mapping, ':')) {
223  inputColumnMap[stoi(mapping)] = index++;
224  }
225  if (inputColumnMap.size() < arity_) {
226  throw std::invalid_argument("Invalid column set was given: <" + columnString + ">");
227  }
228  } else {
229  while (inputColumnMap.size() < arity_) {
230  int size = static_cast<int>(inputColumnMap.size());
231  inputColumnMap[size] = size;
232  }
233  }
234  return inputColumnMap;
235  }
236 
237  const std::string delimiter;
238  std::istream& file;
239  size_t lineNumber;

◆ nextElement()

std::string souffle::ReadStreamCSV::nextElement ( const std::string &  line,
size_t &  start,
size_t &  end 
)
inlineprotected

Definition at line 163 of file ReadStreamCSV.h.

163  {
164  int record_parens = 0;
165  size_t next_delimiter = line.find(delimiter, start);
166 
167  // Find first delimiter after the record.
168  while (end < std::min(next_delimiter, line.length()) || record_parens != 0) {
169  // Track the number of parenthesis.
170  if (line[end] == '[') {
171  ++record_parens;
172  } else if (line[end] == ']') {
173  --record_parens;
174  }
175 
176  // Check for unbalanced parenthesis.
177  if (record_parens < 0) {
178  break;
179  };
180 
181  ++end;
182 
183  // Find a next delimiter if the old one is invalid.
184  // But only if inside the unbalance parenthesis.
185  if (end == next_delimiter && record_parens != 0) {
186  next_delimiter = line.find(delimiter, end);
187  }
188  }
189 
190  // Handle the end-of-the-line case where parenthesis are unbalanced.
191  if (record_parens != 0) {
192  std::stringstream errorMessage;
193  errorMessage << "Unbalanced record parenthesis " << lineNumber << "; ";
194  throw std::invalid_argument(errorMessage.str());
195  }
196  } else {
197  end = std::min(line.find(delimiter, start), line.length());
198  }
199 
200  // Check for missing value.
201  if (start > end) {
202  std::stringstream errorMessage;
203  errorMessage << "Values missing in line " << lineNumber << "; ";
204  throw std::invalid_argument(errorMessage.str());
205  }
206 
207  element = line.substr(start, end - start);
208  start = end + delimiter.size();
209 
210  return element;
211  }
212 
213  std::map<int, int> getInputColumnMap(
214  const std::map<std::string, std::string>& rwOperation, const unsigned arity_) const {
215  std::string columnString = getOr(rwOperation, "columns", "");

References delimiter, and lineNumber.

◆ readNextTuple()

Own<RamDomain[]> souffle::ReadStreamCSV::readNextTuple ( )
inlineoverrideprotectedvirtual

Read and return the next tuple.

Returns nullptr if no tuple was readable.

Returns

Implements souffle::ReadStream.

Reimplemented in souffle::ReadFileCSV.

Definition at line 69 of file ReadStreamCSV.h.

72  {
73  return nullptr;
74  }
75  // Handle Windows line endings on non-Windows systems
76  if (!line.empty() && line.back() == '\r') {
77  line = line.substr(0, line.length() - 1);
78  }
79  ++lineNumber;
80 
81  size_t start = 0;
82  size_t end = 0;
83  size_t columnsFilled = 0;
84  for (uint32_t column = 0; columnsFilled < arity; column++) {
85  size_t charactersRead = 0;
86  std::string element = nextElement(line, start, end);
87  if (inputMap.count(column) == 0) {
88  continue;
89  }
90  ++columnsFilled;
91 
92  try {
93  auto&& ty = typeAttributes.at(inputMap[column]);
94  switch (ty[0]) {
95  case 's': {
96  tuple[inputMap[column]] = symbolTable.unsafeLookup(element);
97  charactersRead = element.size();
98  break;
99  }
100  case 'r': {
101  tuple[inputMap[column]] = readRecord(element, ty, 0, &charactersRead);
102  break;
103  }
104  case '+': {
105  tuple[inputMap[column]] = readADT(element, ty, 0, &charactersRead);
106  break;
107  }
108  case 'i': {
109  tuple[inputMap[column]] = RamSignedFromString(element, &charactersRead);
110  break;
111  }
112  case 'u': {
113  tuple[inputMap[column]] = ramBitCast(readRamUnsigned(element, charactersRead));
114  break;
115  }
116  case 'f': {
117  tuple[inputMap[column]] = ramBitCast(RamFloatFromString(element, &charactersRead));
118  break;
119  }
120  default: fatal("invalid type attribute: `%c`", ty[0]);
121  }
122  // Check if everything was read.
123  if (charactersRead != element.size()) {
124  throw std::invalid_argument(
125  "Expected: " + delimiter + " or \\n. Got: " + element[charactersRead]);
126  }
127  } catch (...) {
128  std::stringstream errorMessage;
129  errorMessage << "Error converting <" + element + "> in column " << column + 1 << " in line "
130  << lineNumber << "; ";
131  throw std::invalid_argument(errorMessage.str());
132  }
133  }
134 
135  return tuple;
136  }
137 
138  /**
139  * Read an unsigned element. Possible bases are 2, 10, 16
140  * Base is indicated by the first two chars.

◆ readRamUnsigned()

RamUnsigned souffle::ReadStreamCSV::readRamUnsigned ( const std::string &  element,
size_t &  charactersRead 
)
inlineprotected

Read an unsigned element.

Possible bases are 2, 10, 16 Base is indicated by the first two chars.

Definition at line 146 of file ReadStreamCSV.h.

149  {
150  value = RamUnsignedFromString(element, &charactersRead, 2);
151  } else if (isPrefix("0x", element)) {
152  value = RamUnsignedFromString(element, &charactersRead, 16);
153  } else {
154  value = RamUnsignedFromString(element, &charactersRead);
155  }
156  return value;
157  }
158 
159  std::string nextElement(const std::string& line, size_t& start, size_t& end) {
160  std::string element;
161 

References souffle::RamUnsignedFromString().

Here is the call graph for this function:

Field Documentation

◆ delimiter

const std::string souffle::ReadStreamCSV::delimiter
protected

Definition at line 241 of file ReadStreamCSV.h.

Referenced by nextElement().

◆ file

std::istream& souffle::ReadStreamCSV::file
protected

Definition at line 242 of file ReadStreamCSV.h.

Referenced by souffle::ReadFileCSV::ReadFileCSV().

◆ inputMap

std::map<int, int> souffle::ReadStreamCSV::inputMap
protected

Definition at line 244 of file ReadStreamCSV.h.

Referenced by ReadStreamCSV().

◆ lineNumber

size_t souffle::ReadStreamCSV::lineNumber
protected

Definition at line 243 of file ReadStreamCSV.h.

Referenced by nextElement().


The documentation for this class was generated from the following file:
souffle::RamSignedFromString
RamSigned RamSignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamSigned.
Definition: StringUtil.h:51
TCB_SPAN_NAMESPACE_NAME::detail::size
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.h:198
souffle::ReadStreamCSV::readRamUnsigned
RamUnsigned readRamUnsigned(const std::string &element, size_t &charactersRead)
Read an unsigned element.
Definition: ReadStreamCSV.h:146
souffle::isPrefix
bool isPrefix(const std::string &prefix, const std::string &element)
Determine if one string is a prefix of another.
Definition: StringUtil.h:292
souffle::SerialisationStream< false >::symbolTable
RO< SymbolTable > & symbolTable
Definition: SerialisationStream.h:71
souffle::SerialisationStream< false >::typeAttributes
std::vector< std::string > typeAttributes
Definition: SerialisationStream.h:74
souffle::getOr
C::mapped_type const & getOr(const C &container, typename C::key_type key, const typename C::mapped_type &defaultValue)
Get value for a given key; if not found, return default value.
Definition: ContainerUtil.h:111
souffle::ReadStreamCSV::nextElement
std::string nextElement(const std::string &line, size_t &start, size_t &end)
Definition: ReadStreamCSV.h:163
souffle::ReadStreamCSV::getInputColumnMap
std::map< int, int > getInputColumnMap(const std::map< std::string, std::string > &rwOperation, const unsigned arity_) const
Definition: ReadStreamCSV.h:217
souffle::fatal
void fatal(const char *format, const Args &... args)
Definition: MiscUtil.h:198
souffle::RamUnsignedFromString
RamUnsigned RamUnsignedFromString(const std::string &str, std::size_t *position=nullptr, const int base=10)
Converts a string to a RamUnsigned.
Definition: StringUtil.h:110
souffle::SerialisationStream< false >::arity
size_t arity
Definition: SerialisationStream.h:76
souffle::ramBitCast
To ramBitCast(From source)
In C++20 there will be a new way to cast between types by reinterpreting bits (std::bit_cast),...
Definition: RamTypes.h:87
souffle::ReadStreamCSV::file
std::istream & file
Definition: ReadStreamCSV.h:242
souffle::ReadStreamCSV::lineNumber
size_t lineNumber
Definition: ReadStreamCSV.h:243
souffle::RamFloatFromString
RamFloat RamFloatFromString(const std::string &str, std::size_t *position=nullptr)
Converts a string to a RamFloat.
Definition: StringUtil.h:93
souffle::ReadStream::readADT
RamDomain readADT(const std::string &source, const std::string &adtName, size_t pos=0, size_t *charactersRead=nullptr)
Definition: ReadStream.h:143
souffle::ReadStream::readRecord
RamDomain readRecord(const std::string &source, const std::string &recordTypeName, size_t pos=0, size_t *charactersRead=nullptr)
Read a record from a string.
Definition: ReadStream.h:71
souffle::ReadStreamCSV::delimiter
const std::string delimiter
Definition: ReadStreamCSV.h:241
souffle::ReadStreamCSV::inputMap
std::map< int, int > inputMap
Definition: ReadStreamCSV.h:244