fpmas 1.6
random.h
Go to the documentation of this file.
1#ifndef FPMAS_RANDOM_H
2#define FPMAS_RANDOM_H
3
8#include "generator.h"
9#include "distribution.h"
10#include <numeric>
11
12namespace fpmas { namespace random {
16 extern const std::mt19937::result_type default_seed;
17
24
29 extern DistributedGenerator<> rd_choices;
30
47 template<typename T, typename Generator_t>
48 std::vector<T> local_choices(
49 const std::vector<T>& local_items, std::size_t n,
50 Generator_t& gen) {
52 0, local_items.size()-1);
53 std::vector<T> random_items(n);
54 for(std::size_t i = 0; i < n; i++)
55 random_items[i] = local_items[local_random_item(gen)];
56 return random_items;
57 }
58
68 template<typename T>
69 std::vector<T> local_choices(
70 const std::vector<T>& local_items, std::size_t n) {
71 return local_choices(local_items, n, rd_choices);
72 }
73
111 template<typename T>
112 std::vector<std::vector<T>> distributed_choices(
114 const std::vector<T>& local_items, std::size_t n) {
116 std::vector<std::size_t> item_counts
117 = int_mpi.allGather(local_items.size());
118
119 std::unordered_map<int, std::size_t> requests;
120
121 random::DiscreteDistribution<std::size_t> rd_process(item_counts);
122 for(std::size_t i = 0; i < n; i++)
123 requests[rd_process(rd_choices)]++;
124 requests = int_mpi.allToAll(requests);
125
126 std::unordered_map<int, std::vector<T>> random_items;
127 for(auto item : requests)
128 random_items[item.first]
129 = local_choices(local_items, item.second, rd_choices);
130
132
133 std::vector<std::vector<T>> results(comm.getSize());
134 for(auto item : item_mpi.allToAll(random_items))
135 results[item.first] = item.second;
136
137 return results;
138 }
139
168 template<typename T>
169 std::vector<T> split_choices(
171 const std::vector<T>& local_items, std::size_t n) {
172 // A deterministic generator that produces the same sequence on all
173 // processes
174 static mt19937_64 gen(seed);
175
177 std::vector<std::size_t> item_counts
178 = int_mpi.allGather(local_items.size());
179
180 random::DiscreteDistribution<std::size_t> rd_process(item_counts);
181
182 std::unordered_map<int, std::size_t> requests;
183
184 // Determines how many items will be selected from this process, so
185 // that the sum of local_items_count on all processes is equal to
186 // n.
187 std::size_t local_items_count = 0;
188 for(std::size_t i = 0; i < n; i++) {
189 // Same result on all processes
190 int process = rd_process(gen);
191 if(process == comm.getRank())
192 local_items_count++;
193 }
194
195 return local_choices(local_items, local_items_count);
196 }
214 template<typename T, typename Generator_t>
215 std::vector<T> local_sample(
216 const std::vector<T>& local_items, std::size_t n,
217 Generator_t& gen) {
218 std::vector<T> sample(std::min(local_items.size(), n));
219
220 for(std::size_t i = 0; i < sample.size(); i++)
221 sample[i] = local_items[i];
222
223
224 for(std::size_t i = sample.size(); i < local_items.size(); i++) {
226 std::size_t k = rd_index(gen);
227 if(k < sample.size())
228 sample[k] = local_items[i];
229 }
230 return sample;
231 }
232
242 template<typename T>
243 std::vector<T> local_sample(
244 const std::vector<T>& local_items, std::size_t n
245 ) {
246 return local_sample(local_items, n, rd_choices);
247 }
248
249
291 template<typename Index_t, typename Generator_t>
292 std::vector<Index_t> sample_indexes(
293 Index_t begin, Index_t end,
294 std::size_t n,
295 Generator_t& gen
296 ) {
297 // (process, local_index)
298 std::vector<Index_t> result_indexes(n);
299
300 /* Initializes the indexes reservoir */
301 Index_t p = begin;
302 for(std::size_t i = 0; i < n; i++) {
303 result_indexes[i] = p++;
304 }
305 /**************************************/
306
307 std::size_t i = n;
308 while(p != end) {
310 std::size_t k = random_k(gen);
311 if(k < n)
312 result_indexes[k] = p;
313 ++i;
314 ++p;
315 }
316
317 return result_indexes;
318 }
319
320
356 template<typename K>
357 class Index {
358 private:
359 const std::map<K, std::size_t>* item_counts;
360 typename std::map<K, std::size_t>::const_iterator it;
361 std::size_t _offset;
362
363 public:
364 Index() = default;
365
371 Index(const std::map<K, std::size_t>* item_counts)
372 : item_counts(item_counts) {
373 }
381 const std::map<K, std::size_t>* item_counts,
382 typename std::map<K, std::size_t>::const_iterator it
383 )
384 : item_counts(item_counts), it(it), _offset(0) {
385 }
386
392 const std::map<K, std::size_t>* item_counts,
393 K key,
394 std::size_t offset
395 ) :
396 item_counts(item_counts),
397 it(this->item_counts->find(key)),
398 _offset(offset) {
399 }
400
404 K key() const {
405 return it->first;
406 };
410 std::size_t offset() const {
411 return _offset;
412 }
413
418 static Index begin(const std::map<K, std::size_t>* item_counts);
419
423 static Index end(const std::map<K, std::size_t>* item_counts);
424
429
435
439 Index operator+(std::size_t n) const;
440
449 static std::size_t distance(
450 const Index& i1, const Index& i2);
451
458 bool operator==(const Index& index) const;
462 bool operator!=(const Index& index) const;
463
467 bool operator<(const Index& index) const;
468 };
469
470 template<typename K>
471 Index<K> Index<K>::begin(const std::map<K, std::size_t>* item_counts) {
472 Index<K> _begin(item_counts, item_counts->begin());
473 while(_begin.it != item_counts->end() && _begin.it->second == 0)
474 ++_begin.it;
475 return _begin;
476 }
477
478 template<typename K>
479 Index<K> Index<K>::end(const std::map<K, std::size_t>* item_counts) {
480 Index<K> _end(item_counts, item_counts->end());
481 return _end;
482 }
483
484 template<typename K>
486 if(it != item_counts->end()) {
487 if(_offset >= it->second-1) {
488 _offset=0;
489 ++it;
490 // Skips processes without items
491 while(it != item_counts->end() && it->second == 0)
492 ++it;
493 } else {
494 ++_offset;
495 }
496 }
497 return *this;
498 }
499
500 template<typename K>
502 Index<K> index = *this;
503 ++*this;
504 return index;
505 }
506
507 template<typename K>
508 Index<K> Index<K>::operator+(std::size_t n) const {
509 Index<K> index = *this;
510 // index.id->second <=> index.item_counts[index.key()]
511 if(index._offset+n < index.it->second) {
512 // Stay in current process, nothing to do
513 index._offset+=n;
514 } else {
515 // Need to go to next process
516 n -= index.it->second - index.offset();
517 index._offset = 0;
518 ++index.it;
519 while(n >= index.it->second) {
520 n-= index.it->second;
521 ++index.it;
522 }
523 index._offset = n;
524 }
525 return index;
526 }
527
528 template<typename K>
530 const Index<K>& i1, const Index<K>& i2) {
531 if(i1.it != i1.item_counts->end() && i2.it != i2.item_counts->end()) {
532 if(i1.key() == i2.key()) {
533 return i2.offset() - i1.offset();
534 } else {
535 std::size_t result = i1.it->second - i1.offset();
536 auto it = i1.it;
537 ++it;
538 while(it->first != i2.it->first) {
539 result+=it->second;
540 ++it;
541 }
542 result+=i2.offset();
543 return result;
544 }
545 }
546 if(i1.it == i1.item_counts->end() && i2.it == i2.item_counts->end())
547 return 0;
548 // Since i2 reachable from i1, only i2 can be end (if i1 is end and not
549 // i2, unexpected result)
550
551 std::size_t result = i1.it->second - i1.offset();
552 auto it = i1.it;
553 ++it;
554 while(it != i1.item_counts->end()) {
555 result+=it->second;
556 ++it;
557 }
558 return result;
559 }
560
561 template<typename K>
562 bool Index<K>::operator==(const Index<K>& index) const {
563 if(it == item_counts->end() && index.it == index.item_counts->end())
564 return true;
565 if(it != item_counts->end() && index.it != index.item_counts->end())
566 return key() == index.key() && offset() == index.offset();
567 // One is end and other is not
568 return false;
569 }
570
571 template<typename K>
572 bool Index<K>::operator!=(const Index<K>& index) const {
573 return !(*this==index);
574 }
575
576 template<typename K>
577 bool Index<K>::operator<(const Index<K>& index) const {
578 if(this->it == this->item_counts->end()
579 && index.it != index.item_counts->end())
580 // This is end, index is not
581 return false;
582 if(index.it == index.item_counts->end()
583 && this->it != this->item_counts->end())
584 // Index is end, this is not
585 return true;
586 if(index.it == index.item_counts->end()
587 && this->it == this->item_counts->end())
588 // Both end (ie equal)
589 return false;
590 // Index and this are not end
591 if(this->key() == index.key())
592 return this->offset() < index.offset();
593 return this->key() < index.key();
594 }
595
614
652 template<typename T>
653 std::vector<std::vector<T>> distributed_sample(
655 const std::vector<T> &local_items, std::size_t n
656 ) {
657
659 std::vector<std::size_t> item_counts
660 = int_mpi.allGather(local_items.size());
661 std::map<int, std::size_t> item_counts_map;
662 for(std::size_t i = 0; i < item_counts.size(); i++)
663 item_counts_map[i] = item_counts[i];
664
665 // index = (process, local_offset)
666 std::vector<DistributedIndex> result_indexes
668 DistributedIndex::begin(&item_counts_map),
669 DistributedIndex::end(&item_counts_map),
670 std::min(
671 n,
672 std::accumulate(item_counts.begin(), item_counts.end(), 0ul)
673 ), rd_choices);
674
675 // Requests required items
677 std::unordered_map<int, std::vector<std::size_t>> requests;
678 for(auto& item : result_indexes)
679 requests[item.key()].push_back(item.offset());
680 requests = request_mpi.allToAll(requests);
681
682 // Exchange items
684 std::unordered_map<int, std::vector<T>> items;
685 for(auto item : requests)
686 for(auto index : item.second)
687 items[item.first].push_back(local_items[index]);
688 items = item_mpi.allToAll(items);
689
690 // Result transformation
691 std::vector<std::vector<T>> results(comm.getSize());
692 for(auto item : items)
693 results[item.first] = item.second;
694 return results;
695 }
696
745 template<typename T>
746 std::vector<T> split_sample(
748 const std::vector<T> &local_items, std::size_t n
749 ) {
750 // A deterministic generator that produces the same sequence on all
751 // processes
752 static mt19937_64 gen(seed);
753
755 std::vector<std::size_t> item_counts
756 = int_mpi.allGather(local_items.size());
757 std::map<int, std::size_t> item_counts_map;
758 for(std::size_t i = 0; i < item_counts.size(); i++)
759 item_counts_map[i] = item_counts[i];
760
761 // index = (process, local_offset)
762 std::vector<DistributedIndex> result_indexes
764 DistributedIndex::begin(&item_counts_map),
765 DistributedIndex::end(&item_counts_map),
766 std::min(
767 n,
768 std::accumulate(item_counts.begin(), item_counts.end(), 0ul)
769 ), gen);
770
771 // Because gen is sequential, the same result_indexes list is built
772 // on all processes
773
774 std::vector<T> result;
775 for(auto& item : result_indexes)
776 if(item.key() == comm.getRank())
777 result.push_back(local_items[item.offset()]);
778 return result;
779 }
780}}
781#endif
Definition: communication.h:251
std::vector< T > allGather(const T &) override
Definition: communication.h:492
std::unordered_map< int, T > allToAll(std::unordered_map< int, T > export_map) override
Definition: communication.h:446
Definition: distribution.h:126
Definition: distribution.h:24
Definition: generator.h:113
UniformRandomBitGenerator< Generator_t >::result_type result_type
Definition: generator.h:120
Definition: random.h:357
Index operator+(std::size_t n) const
Definition: random.h:508
Index(const std::map< K, std::size_t > *item_counts)
Definition: random.h:371
static Index end(const std::map< K, std::size_t > *item_counts)
Definition: random.h:479
bool operator!=(const Index &index) const
Definition: random.h:572
std::size_t offset() const
Definition: random.h:410
static std::size_t distance(const Index &i1, const Index &i2)
Definition: random.h:529
Index(const std::map< K, std::size_t > *item_counts, K key, std::size_t offset)
Definition: random.h:391
K key() const
Definition: random.h:404
Index & operator++()
Definition: random.h:485
bool operator<(const Index &index) const
Definition: random.h:577
Index operator++(int)
Definition: random.h:501
Index(const std::map< K, std::size_t > *item_counts, typename std::map< K, std::size_t >::const_iterator it)
Definition: random.h:380
static Index begin(const std::map< K, std::size_t > *item_counts)
Definition: random.h:471
bool operator==(const Index &index) const
Definition: random.h:562
std::vector< std::vector< T > > distributed_sample(api::communication::MpiCommunicator &comm, const std::vector< T > &local_items, std::size_t n)
Definition: random.h:653
Index< int > DistributedIndex
Definition: random.h:613
DistributedGenerator rd_choices
Definition: random.cpp:4
const std::mt19937::result_type default_seed
Definition: random.cpp:5
std::vector< T > split_sample(api::communication::MpiCommunicator &comm, const std::vector< T > &local_items, std::size_t n)
Definition: random.h:746
std::vector< T > local_choices(const std::vector< T > &local_items, std::size_t n, Generator_t &gen)
Definition: random.h:48
std::vector< Index_t > sample_indexes(Index_t begin, Index_t end, std::size_t n, Generator_t &gen)
Definition: random.h:292
std::vector< T > split_choices(api::communication::MpiCommunicator &comm, const std::vector< T > &local_items, std::size_t n)
Definition: random.h:169
mt19937::result_type seed
Definition: random.cpp:6
std::vector< T > local_sample(const std::vector< T > &local_items, std::size_t n, Generator_t &gen)
Definition: random.h:215
std::vector< std::vector< T > > distributed_choices(api::communication::MpiCommunicator &comm, const std::vector< T > &local_items, std::size_t n)
Definition: random.h:112
Definition: fpmas.cpp:3
Definition: communication.h:585