daltoniam/Skeets

Different image urls give same hash

sladjannimcevic opened this issue · 2 comments

private func hash(u: String) -> String {
    var url = u
    let len = count(url)-1
    if url[advance(url.startIndex,len)] == "/" {
        url = url[url.startIndex..<advance(url.startIndex,len)]
    }
    let size: Int = count(url)
    var hash: Int64 = Int64(size / 2)
    for codeUnit in url.utf8 {
        hash = hash + (Int(codeUnit) * 101)
    }
    return "\(hash)"
}

returns same hash for different urls.

https://allappwestus.blob.core.windows.net/images/caef4757-b825-4daa-bdba-283d90144404.jpg
https://allappwestus.blob.core.windows.net/images/5cbd969d-5432-4140-b1af-6ae0d38b05bf.jpg

Indeed it does. It is possible for any hashing algorithm to have collisions, but I think the hashing algorithm could be improved. I say we try changing the algorithm to this:

private func hash(u: String) -> String {
    var url = u
    let len = count(url)-1
    if url[advance(url.startIndex,len)] == "/" {
        url = url[url.startIndex..<advance(url.startIndex,len)]
    }
    var hash: UInt32 = 0
    for codeUnit in url.utf8 {
        hash += UInt32(codeUnit)
        hash ^= (hash >> 6)
    }

    hash += (hash << 3)
    hash ^= (hash >> 11)
    hash += (hash << 15)

    return "\(hash)"
}

Appears to have a better distribution.

Great! Thank you for great work