Skip to content

Instantly share code, notes, and snippets.

@Obolrom
Last active December 16, 2023 16:03
Show Gist options
  • Save Obolrom/d6f367edc709a7dc5588d5839860f3dd to your computer and use it in GitHub Desktop.
Save Obolrom/d6f367edc709a7dc5588d5839860f3dd to your computer and use it in GitHub Desktop.
import java.util.Objects
/**
* Code sample for detecting objects duplication by given criteria
*
* The task: detect duplications by the following uniqueness criteria
*
* The criteria:
* * firstName
* * lastName
* * at least one common company id (assume the company id is unique)
*/
data class Employee(
val id: Long,
val firstName: String,
val lastName: String,
val companies: List<Company>,
val phoneNumber: String,
) {
override fun toString(): String {
return "{id=$id, firstName='$firstName', lastName='$lastName', companies=$companies, phoneNumber='$phoneNumber'}"
}
}
data class Company(
val id: Long,
val name: String,
) {
override fun toString(): String {
return "{id=$id, name='$name'}"
}
}
// sample duplication criteria, depends on your business rules
data class EmployeeDuplicationCriteria(
val firstName: String,
val lastName: String,
val companyIds: Set<Long>,
) {
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false
other as EmployeeDuplicationCriteria
if (firstName != other.firstName) return false
if (lastName != other.lastName) return false
// check if at least on company id contains both employees -> duplication
if (companyIds.intersect(other.companyIds).isEmpty()) return false
return true
}
// do not use companyIds to avoid improper grouping
override fun hashCode(): Int {
return Objects.hash(firstName, lastName)
}
}
fun main() {
val companyA = Company(1, "CompanyA")
val companyB = Company(2, "CompanyB")
// duplicated employees: (1, 4), (3, 5)
val employees = listOf(
Employee(1, "John", "Patrick", listOf(companyA), "sampleNumber1"),
Employee(2, "Jake", "Andretti", listOf(companyA), "sampleNumber2"),
Employee(3, "Andrew", "Gray", listOf(companyB), "sampleNumber3"),
Employee(4, "John", "Patrick", listOf(companyA, companyB), "sampleNumber4"),
Employee(5, "Andrew", "Gray", listOf(companyB), "sampleNumber5"),
)
// with map simply transform the employee list into list of Pair<EmployeeDuplicationCriteria, Employee>
employees.map { employee ->
EmployeeDuplicationCriteria(
firstName = employee.firstName,
lastName = employee.lastName,
companyIds = employee.companies.mapTo(mutableSetOf()) { it.id },
) to employee
}
// the magic happens here, we group by our custom criteria object
.groupBy { (dupCriteria, _) -> dupCriteria}
.map { (_, resultedDuplications) -> resultedDuplications.map { it.second } }
.filter { it.size > 1 }
.forEach { duplicatedEmployees ->
println("duplicated employee ids: ${duplicatedEmployees.map { it.id }}, $duplicatedEmployees")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment