nimforum mirror - porting c++ [string]view demo to nim: a puzzle

ggibson (orginal) [2022-02-20T15:43:47+01:00] view original

A friend was very excited about c++ string_view, which is now available since c++17, and they shared this demo with me where compiling without string_view will yield a runtime of several seconds, and with string_view the runtime is 30-60ms. We often chat about nim so I offered to code up the nim equivalent. I was hoping to use {.experimental: "views".} but couldn't see how to do it, so instead made my own string_view type. To me, this means the answer for doing it in nim is "you have to roll your own." But maybe you can find a way to use views to do this? And I'll learn something :p

Below is the original c++ version followed by my nim version with a shoddy string_view implementation. The demo is just a text corpus to CountTable looking for particular sentences. The demo originally used the Project Gutenberg Encyclopedia, but any suitable corpus should work.

For c++ and nim demos below, compile with or without string_view defined to toggle support.

Caveat: Obviously, you could rewrite the algorithm, but the goal here is to make as few changes as possible and retain the original reading of the algorithm while achieving speedup using views.

Without string_view: g++ -O3 -o book_reader book_reader.cpp

With string_view: g++ -O3 -Dstringview -o book_reader book_reader.cpp

#include<iostream>
#include<fstream>
#include<vector>
#include<string>
#include<string_view>
#include<map>
#include<chrono>
using namespace std; // sorry, I find this more readable

int main() {
    string line,Book;
    ifstream f("pg200.txt");
    while(getline(f,line))
        Book += line + " ";
    f.close();

#ifdef stringview
    auto book = string_view(Book);
    vector<string_view> sentences;
    map<string_view,long> counts;
#else
    auto book = Book;
    vector<string> sentences;
    map<string,long> counts;
#endif
    
    auto start = chrono::system_clock::now();
    auto i = book.find("The ");
    while(i!=string::npos) {
        auto p = book.find_first_of('.',i);
        sentences.push_back(book.substr(i,p-i+1));
        book = book.substr(p);
        i = book.find("The ");
    }
    
    for (auto &se : sentences) {
        auto s = se;
        auto i = s.find(" ");
        while(i!=string::npos) {
            counts[s.substr(0,i)]++;
            s = s.substr(i+1);
            i = s.find(" ");
        }
    }
    auto end = chrono::system_clock::now();
    
    cout << "processing took " << chrono::duration_cast<chrono::milliseconds>(end-start).count() << " ms" << endl;
}

Without string_view: nim c -d:danger --gc:arc nimbook_reader.nim

With string_view: nim c -d:stringview -d:danger --gc:arc nimbook_reader.nim

import times
import tables
import strutils
import hashes

## my good-enough incomplete string_view
type
    string_view = object
      src: ptr string
      a,b: int
      hsh: Hash
proc initString_view(s:var string):string_view =
    result.src = addr s
    result.a = 0
    result.b = s.len - 1
    result.hsh = hash(s)
proc hash(sv:string_view):Hash =
    result = sv.hsh !& hash(sv.a) !& hash(sv.b)
    result = !$ result
template `$`(sv:string_view):untyped =
  sv.src[][sv.a..sv.b]
template substr(sv:string_view, start,last:int):untyped =
  string_view(src:sv.src,a:sv.a+start,b:sv.a+last)
template substr(sv:string_view, start:int):untyped =
  string_view(src:sv.src,a:sv.a+start,b:sv.b)
template find(sv:string_view, sub:char|string, start:Natural=0):untyped =
  sv.src[].find(sub, start+sv.a, sv.b)-sv.a
#template find(sv:string_view, sub:char|string, start:Natural, last:Natural):untyped =
#    sv.src[].find(sub, start+sv.a, last+sv.a)-sv.a

proc main() =
    var bookBuilder = ""
    for line in lines "pg200.txt":
        bookBuilder.add line & " "
    
    when defined(stringview):
        var sentences = newSeq[string_view]()
        var counts = newCountTable[string_view]()
        var book = initString_view(bookBuilder)
    else:
        var sentences = newSeq[string]()
        var counts = newCountTable[string]()
        var book = bookBuilder
    
    let start = cpuTime()
    var i = book.find("The ")
    while i >= 0:
        var p = book.find('.', start=i)
        sentences.add book.substr(i,p)
        book = book.substr(p)
        i = book.find("The ")
    
    for s in sentences:
        var frag = s
        var i = frag.find(" ")
        while i >= 0:
            counts.inc(frag.substr(0,i))
            frag = frag.substr(i+1)
            i = frag.find(" ")
    let stop = cpuTime()
    
    echo "processing took ", (stop-start)*1_000, " ms"

when isMainModule: main()

Mirror of forum.nim-lang.org

8935 :: porting c++ [string]view demo to nim: a puzzle