souffle  2.0.2-371-g6315b36
WriteStreamCSV.h
Go to the documentation of this file.
1 /*
2  * Souffle - A Datalog Compiler
3  * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved
4  * Licensed under the Universal Permissive License v 1.0 as shown at:
5  * - https://opensource.org/licenses/UPL
6  * - <souffle root>/licenses/SOUFFLE-UPL.txt
7  */
8 
9 /************************************************************************
10  *
11  * @file WriteStreamCSV.h
12  *
13  ***********************************************************************/
14 
15 #pragma once
16 
17 #include "souffle/RamTypes.h"
18 #include "souffle/SymbolTable.h"
19 #include "souffle/io/WriteStream.h"
23 #ifdef USE_LIBZ
24 #include "souffle/io/gzfstream.h"
25 #endif
26 
27 #include <cstddef>
28 #include <fstream>
29 #include <iomanip>
30 #include <iostream>
31 #include <map>
32 #include <ostream>
33 #include <string>
34 #include <vector>
35 
36 namespace souffle {
37 
38 class RecordTable;
39 
40 class WriteStreamCSV : public WriteStream {
41 protected:
42  WriteStreamCSV(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
43  const RecordTable& recordTable)
45  delimiter(getOr(rwOperation, "delimiter", "\t")){};
46 
47  const std::string delimiter;
48 
49  void writeNextTupleCSV(std::ostream& destination, const RamDomain* tuple) {
50  writeNextTupleElement(destination, typeAttributes.at(0), tuple[0]);
51 
52  for (size_t col = 1; col < arity; ++col) {
53  destination << delimiter;
54  writeNextTupleElement(destination, typeAttributes.at(col), tuple[col]);
55  }
56 
57  destination << "\n";
58  }
59 
60  void writeNextTupleElement(std::ostream& destination, const std::string& type, RamDomain value) {
61  switch (type[0]) {
62  case 's': destination << symbolTable.unsafeResolve(value); break;
63  case 'i': destination << value; break;
64  case 'u': destination << ramBitCast<RamUnsigned>(value); break;
65  case 'f': destination << ramBitCast<RamFloat>(value); break;
66  case 'r': outputRecord(destination, value, type); break;
67  case '+': outputADT(destination, value, type); break;
68  default: fatal("unsupported type attribute: `%c`", type[0]);
69  }
70  }
71 };
72 
73 class WriteFileCSV : public WriteStreamCSV {
74 public:
75  WriteFileCSV(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
76  const RecordTable& recordTable)
78  file(getFileName(rwOperation), std::ios::out | std::ios::binary) {
79  if (getOr(rwOperation, "headers", "false") == "true") {
80  file << rwOperation.at("attributeNames") << std::endl;
81  }
82  file << std::setprecision(std::numeric_limits<RamFloat>::max_digits10);
83  }
84 
85  ~WriteFileCSV() override = default;
86 
87 protected:
88  std::ofstream file;
89 
90  void writeNullary() override {
91  file << "()\n";
92  }
93 
94  void writeNextTuple(const RamDomain* tuple) override {
96  }
97 
98  /**
99  * Return given filename or construct from relation name.
100  * Default name is [configured path]/[relation name].csv
101  *
102  * @param rwOperation map of IO configuration options
103  * @return input filename
104  */
105  static std::string getFileName(const std::map<std::string, std::string>& rwOperation) {
106  auto name = getOr(rwOperation, "filename", rwOperation.at("name") + ".csv");
107  if (name.front() != '/') {
108  name = getOr(rwOperation, "output-dir", ".") + "/" + name;
109  }
110  return name;
111  }
112 };
113 
114 #ifdef USE_LIBZ
115 class WriteGZipFileCSV : public WriteStreamCSV {
116 public:
117  WriteGZipFileCSV(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
118  const RecordTable& recordTable)
119  : WriteStreamCSV(rwOperation, symbolTable, recordTable),
120  file(getFileName(rwOperation), std::ios::out | std::ios::binary) {
121  if (getOr(rwOperation, "headers", "false") == "true") {
122  file << rwOperation.at("attributeNames") << std::endl;
123  }
124  file << std::setprecision(std::numeric_limits<RamFloat>::max_digits10);
125  }
126 
127  ~WriteGZipFileCSV() override = default;
128 
129 protected:
130  void writeNullary() override {
131  file << "()\n";
132  }
133 
134  void writeNextTuple(const RamDomain* tuple) override {
135  writeNextTupleCSV(file, tuple);
136  }
137 
138  /**
139  * Return given filename or construct from relation name.
140  * Default name is [configured path]/[relation name].csv
141  *
142  * @param rwOperation map of IO configuration options
143  * @return input filename
144  */
145  static std::string getFileName(const std::map<std::string, std::string>& rwOperation) {
146  auto name = getOr(rwOperation, "filename", rwOperation.at("name") + ".csv.gz");
147  if (name.front() != '/') {
148  name = getOr(rwOperation, "output-dir", ".") + "/" + name;
149  }
150  return name;
151  }
152 
153  gzfstream::ogzfstream file;
154 };
155 #endif
156 
157 class WriteCoutCSV : public WriteStreamCSV {
158 public:
159  WriteCoutCSV(const std::map<std::string, std::string>& rwOperation, const SymbolTable& symbolTable,
160  const RecordTable& recordTable)
162  std::cout << "---------------\n" << rwOperation.at("name");
163  if (getOr(rwOperation, "headers", "false") == "true") {
164  std::cout << "\n" << rwOperation.at("attributeNames");
165  }
166  std::cout << "\n===============\n";
167  std::cout << std::setprecision(std::numeric_limits<RamFloat>::max_digits10);
168  }
169 
170  ~WriteCoutCSV() override {
171  std::cout << "===============\n";
172  }
173 
174 protected:
175  void writeNullary() override {
176  std::cout << "()\n";
177  }
178 
179  void writeNextTuple(const RamDomain* tuple) override {
180  writeNextTupleCSV(std::cout, tuple);
181  }
182 };
183 
184 class WriteCoutPrintSize : public WriteStream {
185 public:
186  explicit WriteCoutPrintSize(const std::map<std::string, std::string>& rwOperation)
187  : WriteStream(rwOperation, {}, {}), lease(souffle::getOutputLock().acquire()) {
188  std::cout << rwOperation.at("name") << "\t";
189  }
190 
191  ~WriteCoutPrintSize() override = default;
192 
193 protected:
194  void writeNullary() override {
195  fatal("attempting to iterate over a print size operation");
196  }
197 
198  void writeNextTuple(const RamDomain* /* tuple */) override {
199  fatal("attempting to iterate over a print size operation");
200  }
201 
202  void writeSize(std::size_t size) override {
203  std::cout << size << "\n";
204  }
205 
207 };
208 
210 public:
211  Own<WriteStream> getWriter(const std::map<std::string, std::string>& rwOperation,
212  const SymbolTable& symbolTable, const RecordTable& recordTable) override {
213 #ifdef USE_LIBZ
214  if (contains(rwOperation, "compress")) {
215  return mk<WriteGZipFileCSV>(rwOperation, symbolTable, recordTable);
216  }
217 #endif
218  return mk<WriteFileCSV>(rwOperation, symbolTable, recordTable);
219  }
220  const std::string& getName() const override {
221  static const std::string name = "file";
222  return name;
223  }
224  ~WriteFileCSVFactory() override = default;
225 };
226 
228 public:
229  Own<WriteStream> getWriter(const std::map<std::string, std::string>& rwOperation,
230  const SymbolTable& symbolTable, const RecordTable& recordTable) override {
231  return mk<WriteCoutCSV>(rwOperation, symbolTable, recordTable);
232  }
233 
234  const std::string& getName() const override {
235  static const std::string name = "stdout";
236  return name;
237  }
238  ~WriteCoutCSVFactory() override = default;
239 };
240 
242 public:
243  Own<WriteStream> getWriter(const std::map<std::string, std::string>& rwOperation, const SymbolTable&,
244  const RecordTable&) override {
245  return mk<WriteCoutPrintSize>(rwOperation);
246  }
247  const std::string& getName() const override {
248  static const std::string name = "stdoutprintsize";
249  return name;
250  }
251  ~WriteCoutPrintSizeFactory() override = default;
252 };
253 
254 } /* namespace souffle */
souffle::WriteStreamCSV::WriteStreamCSV
WriteStreamCSV(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable)
Definition: WriteStreamCSV.h:46
TCB_SPAN_NAMESPACE_NAME::detail::size
constexpr auto size(const C &c) -> decltype(c.size())
Definition: span.h:198
souffle::SerialisationStream< true >::recordTable
RO< RecordTable > & recordTable
Definition: SerialisationStream.h:72
souffle::WriteCoutCSV::~WriteCoutCSV
~WriteCoutCSV() override
Definition: WriteStreamCSV.h:174
souffle::WriteCoutCSV::writeNextTuple
void writeNextTuple(const RamDomain *tuple) override
Definition: WriteStreamCSV.h:183
souffle::WriteCoutCSVFactory::getName
const std::string & getName() const override
Definition: WriteStreamCSV.h:238
souffle::RamDomain
int32_t RamDomain
Definition: RamTypes.h:56
souffle::WriteFileCSV::writeNextTuple
void writeNextTuple(const RamDomain *tuple) override
Definition: WriteStreamCSV.h:98
souffle::WriteCoutPrintSizeFactory
Definition: WriteStreamCSV.h:245
SymbolTable.h
souffle::WriteCoutPrintSizeFactory::getWriter
Own< WriteStream > getWriter(const std::map< std::string, std::string > &rwOperation, const SymbolTable &, const RecordTable &) override
Definition: WriteStreamCSV.h:247
ParallelUtil.h
souffle::WriteFileCSVFactory::~WriteFileCSVFactory
~WriteFileCSVFactory() override=default
souffle::RecordTable
Definition: RecordTable.h:114
souffle::contains
bool contains(const C &container, const typename C::value_type &element)
A utility to check generically whether a given element is contained in a given container.
Definition: ContainerUtil.h:75
souffle::WriteFileCSV::~WriteFileCSV
~WriteFileCSV() override=default
souffle::Own
std::unique_ptr< A > Own
Definition: ContainerUtil.h:42
souffle::WriteStream::outputADT
void outputADT(std::ostream &destination, const RamDomain value, const std::string &name)
Definition: WriteStream.h:124
MiscUtil.h
souffle::WriteStream
Definition: WriteStream.h:38
souffle::WriteStream::WriteStream
WriteStream(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable)
Definition: WriteStream.h:40
gzfstream.h
souffle::WriteFileCSV::writeNullary
void writeNullary() override
Definition: WriteStreamCSV.h:94
souffle::SerialisationStream< true >::symbolTable
RO< SymbolTable > & symbolTable
Definition: SerialisationStream.h:71
souffle::SerialisationStream< true >::typeAttributes
std::vector< std::string > typeAttributes
Definition: SerialisationStream.h:74
souffle::WriteCoutCSVFactory
Definition: WriteStreamCSV.h:231
souffle::WriteStreamCSV::writeNextTupleCSV
void writeNextTupleCSV(std::ostream &destination, const RamDomain *tuple)
Definition: WriteStreamCSV.h:53
souffle::WriteCoutPrintSize::WriteCoutPrintSize
WriteCoutPrintSize(const std::map< std::string, std::string > &rwOperation)
Definition: WriteStreamCSV.h:190
souffle::getOr
C::mapped_type const & getOr(const C &container, typename C::key_type key, const typename C::mapped_type &defaultValue)
Get value for a given key; if not found, return default value.
Definition: ContainerUtil.h:111
souffle::WriteCoutCSV::WriteCoutCSV
WriteCoutCSV(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable)
Definition: WriteStreamCSV.h:163
souffle::WriteStream::outputRecord
void outputRecord(std::ostream &destination, const RamDomain value, const std::string &name)
Definition: WriteStream.h:83
ContainerUtil.h
souffle::WriteCoutPrintSize::writeNullary
void writeNullary() override
Definition: WriteStreamCSV.h:198
souffle::WriteCoutPrintSize
Definition: WriteStreamCSV.h:188
souffle::Lock::Lease
Definition: ParallelUtil.h:468
souffle::SymbolTable
Definition: SymbolTable.h:48
souffle::WriteFileCSVFactory::getWriter
Own< WriteStream > getWriter(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable) override
Definition: WriteStreamCSV.h:215
souffle::WriteFileCSV::getFileName
static std::string getFileName(const std::map< std::string, std::string > &rwOperation)
Return given filename or construct from relation name.
Definition: WriteStreamCSV.h:109
souffle::WriteCoutPrintSize::lease
Lock::Lease lease
Definition: WriteStreamCSV.h:210
souffle::WriteFileCSVFactory::getName
const std::string & getName() const override
Definition: WriteStreamCSV.h:224
souffle::WriteCoutPrintSizeFactory::~WriteCoutPrintSizeFactory
~WriteCoutPrintSizeFactory() override=default
souffle::WriteCoutCSV::writeNullary
void writeNullary() override
Definition: WriteStreamCSV.h:179
WriteStream.h
souffle::WriteFileCSVFactory
Definition: WriteStreamCSV.h:213
souffle::WriteCoutPrintSize::writeNextTuple
void writeNextTuple(const RamDomain *) override
Definition: WriteStreamCSV.h:202
std
Definition: Brie.h:3053
RamTypes.h
souffle::WriteFileCSV::file
std::ofstream file
Definition: WriteStreamCSV.h:92
souffle::fatal
void fatal(const char *format, const Args &... args)
Definition: MiscUtil.h:198
souffle::WriteCoutPrintSize::~WriteCoutPrintSize
~WriteCoutPrintSize() override=default
souffle::getOutputLock
Lock & getOutputLock()
Obtains a reference to the lock synchronizing output operations.
Definition: ParallelUtil.h:568
souffle
Definition: AggregateOp.h:25
souffle::SerialisationStream< true >::arity
size_t arity
Definition: SerialisationStream.h:76
souffle::WriteCoutPrintSizeFactory::getName
const std::string & getName() const override
Definition: WriteStreamCSV.h:251
souffle::WriteCoutPrintSize::writeSize
void writeSize(std::size_t size) override
Definition: WriteStreamCSV.h:206
souffle::WriteStreamFactory
Definition: WriteStream.h:193
souffle::WriteStreamCSV::delimiter
const std::string delimiter
Definition: WriteStreamCSV.h:49
souffle::WriteCoutCSVFactory::getWriter
Own< WriteStream > getWriter(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable) override
Definition: WriteStreamCSV.h:233
souffle::WriteStreamCSV
Definition: WriteStreamCSV.h:44
souffle::tuple
Defines a tuple for the OO interface such that relations with varying columns can be accessed.
Definition: SouffleInterface.h:443
souffle::WriteCoutCSVFactory::~WriteCoutCSVFactory
~WriteCoutCSVFactory() override=default
souffle::WriteStreamCSV::writeNextTupleElement
void writeNextTupleElement(std::ostream &destination, const std::string &type, RamDomain value)
Definition: WriteStreamCSV.h:64
souffle::WriteFileCSV::WriteFileCSV
WriteFileCSV(const std::map< std::string, std::string > &rwOperation, const SymbolTable &symbolTable, const RecordTable &recordTable)
Definition: WriteStreamCSV.h:79
std::type
ElementType type
Definition: span.h:640