Add total ratings for all Candidates (STAR Voting). In future this is going to be read from a file...
# Voting Simulation: Candidates in file header: A, B, C - "STAR Voting Method"
# followed by line items - voter ballots - e.g.:
# - first line 0,2,5 (zero star vote for A, 2 two stars for B and 5 - max for C)
# - third line 5,4,3 (five stars for A, 4 for B and 3 for C
import sequtils, strformat, strutils, algorithm
var castedVotes = """
A,B,C
0,2,5
0,2,5
5,4,3
5,4,3
5,4,3"""
var aTotal, bTotal, cTotal: seq[int]
var Candidates_temp: string
var Candidates: seq[string]
var seq1 = castedVotes.split
var seq2 = seq1[0].split(",")
for e in seq2:
Candidates.add(e)
#[Step 1: Find the two candidates with the highest point totals - expected results - example:
Candidate Score Average Score
A 15 3
B 16 3.2
C 19 3.8]#
# I am trying to add totals for Candidate A, B and C
for line in seq1[1..seq1.high]:
var seq3 = line.split ","
for e in seq3:
aTotal.add(e[0])
bTotal.add(e[1])
cTotal.add(e[2])
q1: Am I using correct data structures? Theoretically Array is more suitable - since all the rows contain voter ballots with small integers only (from min 0 to max 5).
q2: Program fails with Error: type mismatch: got <seq[int], char> - what am I doing wrong? I am trying to add the number of STAR for each Candidate (simple addition). A = 15 , B = 16 , C = 19.
Step 2: Compare the head-to-head matchups between the two finalists: B and C.
# of voters who prefer A B C
A over - 3 3
B over 2 - 3
C over 2 2 -
In the example above - candidate B wins.
q3: How to approach calculations in step 2?
Two candidates with the Highest Score have been identified (B and C) and the 2 step is some kind of Instant Run-off.
What step 2 is doing (is it some kind of pairwise comparison?): https://en.wikipedia.org/wiki/STAR_voting
While not exactly the kind of solution you strictly want, I wanted to try to implement this in a dataframe kind of way, using the ggplotnim dataframe. Two small annoying things, but works well otherwise.
The solution should give you some idea on how to implement the final step of the computation. I can give more concrete input to your example tomorrow.
import std / os
import ggplotnim
var castedVotes = """
A,B,C
0,2,5
0,2,5
5,4,3
5,4,3
5,4,3
"""
# need to write to a file atm, since there's no easy way to parse string into DF
writeFile("data.csv", castedVotes)
# keep the raw data in its own DF (need it later)
var dfRaw = readCsvTyped("data.csv")
# turn wide form DF into long form (two columns: candidate and votes)
var df = dfRaw.gather(dfRaw.getKeys, key = "candidate", value = "votes")
echo df
# extract the winners, by grouping by candidate and then computing the sum of votes
# for each group. Sort in descending order and get the two from the top
let dfSum = df.group_by("candidate").summarize(f{int: "sum(votes)" << sum(`votes`)})
.arrange("sum(votes)", order = SortOrder.Descending)
.head(2)
echo dfSum
# get the two leading candidates
let c0 = dfSum["candidate", string, 0]
let c1 = dfSum["candidate", string, 1]
# and filter the long form DF to one that only contains leading candidates
df = df.filter(f{string: `candidate` == c1 or `candidate` == c1})
# allows us to compute the difference of the ballots of the for the two leading
# candidates.
dfRaw["Diff"] = dfRaw[c0, int] -. dfRaw[c1, int]
# dfRaw = dfRaw.mutate(f{int: df[c0][idx] - df[c1][idx])}) # this is broken atm :(
echo dfRaw # contains "Diff" column now
# Finally just count the number of rows in the DF where the "Diff" column is either
# > 0 or < 0. Just have to make sure we don't count ballots with same vote count ⇒ `Diff == 0`.
let c0More = dfRaw.filter(f{`Diff` > 0}).len
let c1More = dfRaw.filter(f{`Diff` < 0}).len
let winner = if c0More > c1More: c0 else: c1
echo "The winner is ", winner
Note that this solution isn't all that fast, because it loops over the data more times than really necessary for a "hand written solution". And hopefully I actually understood the system with my short glance, haha.
Here's my stab at the problem. Hopefully, well-enough commented and not too buggy.
It's way too imperative to my taste, but I didn't find enough functional primitives to elegantly process data and have enough escape hatches to validate it at the same time. I also don't know exact specs of how the program should output its results, so the code is a bit more generic, to accommodate different possible requirements. Of course, it's probably more contrived than needs to be, but it takes considerably more time to clean it up.
You initial code won't work, unfortunately. For starters, you really need to fully understand how types work, what you need to do to maintain the data properly typed and how you can use it to your advantage (and I only say it bluntly because you had already shown some pieces of code where you had some issues with types, hope you don't mind).
# "STAR Voting Method"
import sequtils, strutils, options
const
TAB = char(9)
castedVotes = """
A,B,C
0,2,5
0,2,5
5,4,3
5,4,3
5,4,3"""
type
Candidate = object
name: string
votes: Natural
total: Natural
VoteData = object
candidates: seq[Candidate]
validBallots: seq[seq[Natural]]
proc parseVotes(s: string): VoteData =
var
line = 1 # human-indexed
columns = 0
for l in s.split('\n'):
block mainLoop:
if line == 1: # separate the first line from the string
for column in l.split(','): # convert a line to a sequence, uses the `split` iterator
result.candidates.add(Candidate(name: strip(column), votes: 0, total: 0))
columns = result.candidates.len
else: # process the rest of the lines
let votesS = l.split(',') # this uses the `split` proc due to overloading
var ballot = newSeqOfCap[Natural](columns) # collect vote integers into a seq
for (i, vs) in votesS.pairs(): # iterate votes and tracks the index
if i >= columns:
stdErr.write("Excess votes in line ", $line, ", skipping\n")
break mainLoop # votes > columns, skip the line
let n = try: parseInt(strip(vs)) # convert a string representation into an integer
except:
stdErr.write("Can't parse votes in line ", $line, ", skipping\n")
break mainLoop # can't parse into a number, skip the line
ballot.add(n) # collect a seq of parsed votes
if ballot.len == columns and ballot.allIt(it in {0..5}): # additional verifiers
for i in 0..<columns:
result.candidates[i].votes.inc(1)
result.candidates[i].total.inc(ballot[i])
result.validBallots.add(ballot)
line.inc()
func finalIds(averages: openArray[float]): (Natural, Natural) =
## Returns indexes of two biggest floats
var scoreA, scoreB = 0.0
for (i, score) in averages.pairs():
if score > scoreA: # new score > the biggest so far
if result[0] > result[1]: # move the biggest to the second place
result[1] = result[0]
scoreB = scoreA
result[0] = i # replace the top
scoreA = score
elif score > scoreB: # new score < the top one, but > the runner-up
result[1] = i
scoreB = score
func runnoff(ballots: openArray[seq[Natural]]; idA, idB: Natural): Option[Natural] =
## This uses two versions of basically the same type
## of comparison for demonstration purposes
var prefsA, prefsB = 0
for ballot in ballots: # basic conditionals
if ballot[idA] > ballot[idB]:
prefsA.inc()
elif ballot[idA] < ballot[idB]:
prefsB.inc()
else: continue
result = case cmp(prefsA, prefsB): # `case` on results of `cmp`
of 1..high(int): some(idA)
of low(int) .. -1: some(idB)
else: none(Natural)
func averages(candidates: openArray[Candidate]): seq[float] =
candidates.mapIt(toFloat(it.total) / toFloat(it.votes))
when isMainModule:
let
voteData = parseVotes(castedVotes)
averageScores = averages(voteData.candidates)
echo("Candidate Score Average Score")
for (i, c) in voteData.candidates.pairs():
# intersperses an arry of strings with the stringified TAB
echo(join([c.name, $c.total, $averageScores[i]], $TAB))
let
finalists = finalIds(averageScores)
winnerId = runnoff(voteData.validBallots, finalists[0], finalists[1])
if winnerId.isSome():
echo("The winner is ", voteData.candidates[winnerId.get()].name)
else:
echo("The vote is a tie between candidates ", voteData.candidates[finalists[0]].name, " and ",voteData.candidates[finalists[1]].name)
hi Guys, Wow - I really appreciate your help!
Would you give me permission to move your code to GitHub: https://github.com/masiarek/STAR_Voting
This is a non-profit organization (STAR Voting) - so big THANK YOU!
Test Case 'Use01' - passed
Test Case 'Use02' - failed https://docs.google.com/spreadsheets/d/1y7m_cGnnpZJVvmwzENNp5e3fMuYdQzshVxbphay9PNU/edit?usp=sharing Message as-is: "The vote is a tie between candidates C and B". Message should-be: "The vote is a tie between candidates C and B. However, C has more points - hence C wins.".
I need to create text files with test cases in a directory on GitHub...
Would you give me permission to move your code to GitHub: https://github.com/masiarek/STAR_Voting > or, even better (post your code directly to GitHub) - I am still learning GitHub...
To be frank, I wrote my piece of code strictly as an educational vehicle and I don't really see the point in putting it anywhere else. If you want to use/redistribute it this snippet, consider it licensed under GNU General Public License v2.0 or later. I may upload it a bit later myself.
Test Case 'Use02' - failed https://docs.google.com/spreadsheets/d/1y7m_cGnnpZJVvmwzENNp5e3fMuYdQzshVxbphay9PNU/edit?usp=sharing
The code in my example already has all the result necessary to determine the winner, it's just a matter of arranging it according to the specs, which I didn't fully grasp when I wrote it. Let's leave it as an exercise for the reader. (What decides the winner in a tie? At which stage of the execution this result appear? How can we refactor the code to use this data to determine the winner unambiguously? Do we need to keep all the data in the structures we use to determine the winner? What code can we safely remove?)
Why not use a CSV parser? https://nim-lang.org/docs/parsecsv.html
I didn't feel this is a case of problem solving, more of a laying out the reasoning in code. My example is more on the verbose and self-sufficient side, compared to the actual code you'd write for a real-world usage.
hi @Zoom, I really appreciate your feedback. Yes, I want to learn programming - so any feedback is very much appreciated. I am looking for a mentor to guide me: what to learn in what sequence, do's and dont's, etc. It sounds my first assignment is to learn using new types in my small educational programs :-)
I am not sure how to structure the testing for a project like this. Any thoughts, ideas, suggestions? I updated High Level Process Overview in doc Programming Specification: https://docs.google.com/document/d/1x9lwhHUYAGa1JVrKuqQ1WKHYoXUEtvySH8KC3YY1ARY/edit#
I guess module Unittest must be used - but how to Assert that certain files (specific contest files) passed all validations (both positive and negative).
https://github.com/masiarek/STAR_Voting - this project is using MIT license (hopefully the most permissive open source type).
This is project is way beyond my programming skills - but with help and guidance from others and my hard work - I am confident that some positive results are possible...
I reimplemented the logic to something I could follow the best I could, and added a line to the data just for testing, and also added a test procedure to showcase "how" one may test this logic. By including C, B, C we've now added atleast some data to test our code, this can be expanded to include the expected winning number, averages or anything else, and only happen in your test suite so the underlying code never has to deal with it.
import std/[algorithm, math, parseutils, strformat, strutils]
const castedVotes = """
C,B,C
A,B,C
0,2,5
0,2,5
5,4,3
5,4,3
5,4,3"""
type
Candidate = object
votes: seq[int]
name: string
VoterData = object
candidates: seq[Candidate]
proc median(a: seq[int]): float =
## Median value probably is a dumb way to do this but *works*
let sorted = a.sorted
if a.len mod 2 == 0:
(sorted[a.len div 2] + sorted[a.len div 2 + 1]) / 2
else:
sorted[a.len div 2 + 1].float
proc calculateRunoffs(data: VoterData): (Candidate, Candidate) =
## This sums each candidate and returns the winners
var sumA, sumB = 0
for i, x in data.candidates:
let sum = x.votes.sum
if sum > sumA:
if sumA > sumB:
result[1] = result[0]
sumB = sumA
result[0] = data.candidates[i]
sumA = sum
elif sum > sumB:
result[1] = data.candidates[i]
sumB = sum
proc doesAWin(a, b: seq[int]): bool =
## Terribly named procedure for simplicity sake
a.median > b.median
proc parseVoterData(data: string): VoterData=
## Iterates though the data extracting names and voters
var
i = 0
buffer = ""
foundAllNames = false
candidate = 0
while i < data.len:
if not foundAllNames:
let strLen = data.parseUntil(buffer, {',', '\n'}, i)
result.candidates.add Candidate(name: buffer.strip)
foundAllNames = data[i + strLen] == '\n'
i += strLen + 1
else:
var score = 0
i += data.parseInt(score, i) + 1
result.candidates[candidate].votes.add score
candidate = (candidate + 1 + result.candidates.len) mod result.candidates.len
proc testParse(data: string) =
## This is for testing data sets it requires baking the actual wins
## in the first line `C,B,C` for the provided data.
# The following code is for our header of 3 characters comma seperated
var
firstCand, secondCand, projCand = ""
i = data.parseUntil(firstCand, {',', '\n'}) + 1
i += data.parseUntil(secondCand, {',', '\n'}, i) + 1
i += data.parseUntil(projCand, {',', '\n'}, i) + 1
let
data = data[i..^1].parseVoterData
(first, second) = data.calculateRunoffs
winningCand =
if doesAWin(first.votes, second.votes):
first
else:
second
assert first.name == firstCand.strip
assert second.name == secondCand.strip
assert winningCand.name == projCand.strip
echo fmt"Those that one the first pass are {first.name}, {second.name}"
if doesAWin(first.votes, second.votes):
echo fmt"{first.name} wins with: {first.votes.median: 2}"
else:
echo fmt"{second.name} wins with: {first.votes.median: 2}"
echo data
testParse(castedVotes)
Here's the zero_functional version. Interprets the run-off criterion like the OP does.
# copyright (c) 2021 Gerd Mathar
# License: Apache License 2.0 (https://spdx.org/licenses/Apache-2.0.html)
# SPDX-License-Identifier: Apache-2.0
import strformat, strutils, zero_functional, tables, algorithm, sugar
const castedVotes = """
A,B,C
0,2,5
0,2,5
5,4,3
5,4,3
5,4,3"""
# the data to accumulate:
type Record = tuple[total: int, over: CountTableRef[string]]
# create iterators instead of seqs to hopefully save memory.
castedVotes.splitLines --> map(it.split(',')).createIter(input)
input() --> drop(1).map(it --> map(it.parseInt)).createIter(ballots)
let names = input() --> take(1).flatten()
var
noBallots = 0
records: seq[Record]
= names --> map((total: 0, over: newCountTable[string]()))
# mappings like `map(ballot = it)` just capture transient loop vars like `it`
# and `idx` for reuse in inner loops.
records = ballots() --> foreach(noBallots.inc).map(ballot = it).fold(records,
a --> map(record = it).map(score = ballot[idx]).map((
total: it.total + score, # add ballot score to total
over: block: # count ballots with score lead pair-wise
ballot --> filter(score > it).foreach(record.over.inc(names[idx]))
record.over
))
)
var entries = names --> zip(records)
# unfortunately, `zip` creates tuple fields with the corresponding lists' names,
# hence the plural "s" name postfix for the `names` and `records` fields below:
# sort by total score.
entries.sort((a, b) => cmp(a.records.total, b.records.total), SortOrder.Descending)
echo "Candidate Score Average Score Ballots with lead over"
entries --> map(rec = it).foreach(
echo fmt"{rec[0]:<9} {rec[1][0]:>5} {rec[1][0]/noBallots:>13} {rec[1][1]}"
)
echo ""
# compare by number of ballots with pair-wise score lead.
let prefCmp = cmp(
entries[0].records.over[entries[1].names],
entries[1].records.over[entries[0].names]
)
echo case prefCmp
of -1: fmt"{entries[1].names} wins"
of 0: fmt"draw between {entries[0].names} and {entries[1].names}"
of 1: fmt"{entries[0].names} wins"
else: "an error occurred"