From 1715143b85175993625acac274eda2cd6cb50b46 Mon Sep 17 00:00:00 2001 From: Benoit Marty Date: Thu, 8 Apr 2021 16:18:58 +0200 Subject: [PATCH 1/4] Filter some other words Cannot filter canonical alias anymore, as we now use word boundaries --- .../roomdirectory/ExplicitTermFilterTest.kt | 118 ++++++++++++++++++ vector/src/main/assets/forbidden_terms.txt | 71 +++++++++++ .../roomdirectory/ExplicitTermFilter.kt | 39 ++++++ .../roomdirectory/RoomDirectoryViewModel.kt | 15 +-- 4 files changed, 231 insertions(+), 12 deletions(-) create mode 100644 vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt create mode 100644 vector/src/main/assets/forbidden_terms.txt create mode 100644 vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt diff --git a/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt new file mode 100644 index 0000000000..a5d8108ae9 --- /dev/null +++ b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2021 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package im.vector.app.features.roomdirectory + +import im.vector.app.InstrumentedTest +import im.vector.app.core.utils.AssetReader +import org.amshove.kluent.shouldBe +import org.junit.FixMethodOrder +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 +import org.junit.runners.MethodSorters + +@RunWith(JUnit4::class) +@FixMethodOrder(MethodSorters.JVM) +class ExplicitTermFilterTest : InstrumentedTest { + + private val explicitTermFilter = ExplicitTermFilter(AssetReader(context())) + + @Test + fun isValidEmptyTrue() { + explicitTermFilter.isValid("") shouldBe true + } + + @Test + fun isValidTrue() { + explicitTermFilter.isValid("Hello") shouldBe true + } + + @Test + fun isValidFalse() { + explicitTermFilter.isValid("nsfw") shouldBe false + } + + @Test + fun isValidUpCaseFalse() { + explicitTermFilter.isValid("Nsfw") shouldBe false + } + + @Test + fun isValidMultilineTrue() { + explicitTermFilter.isValid("Hello\nWorld") shouldBe true + } + + @Test + fun isValidMultilineFalse() { + explicitTermFilter.isValid("Hello\nnsfw") shouldBe false + } + + @Test + fun isValidMultilineFalse2() { + explicitTermFilter.isValid("nsfw\nHello") shouldBe false + } + + @Test + fun isValidAnalFalse() { + explicitTermFilter.isValid("anal") shouldBe false + } + + @Test + fun isValidAnal2False() { + explicitTermFilter.isValid("There is some anal in this room") shouldBe false + } + + @Test + fun isValidAnalysisTrue() { + explicitTermFilter.isValid("analysis") shouldBe true + } + + @Test + fun isValidAnalysis2True() { + explicitTermFilter.isValid("There is some analysis in the room") shouldBe true + } + + @Test + fun isValidSpecialCharFalse() { + explicitTermFilter.isValid("18+") shouldBe false + } + + @Test + fun isValidSpecialChar2False() { + explicitTermFilter.isValid("This is a room with 18+ content") shouldBe false + } + + @Test + fun isValidOtherSpecialCharFalse() { + explicitTermFilter.isValid("strap-on") shouldBe false + } + + @Test + fun isValidOtherSpecialChar2False() { + explicitTermFilter.isValid("This is a room with strap-on content") shouldBe false + } + + @Test + fun isValid18True() { + explicitTermFilter.isValid("18") shouldBe true + } + + @Test + fun isValidLastFalse() { + explicitTermFilter.isValid("zoo") shouldBe false + } +} \ No newline at end of file diff --git a/vector/src/main/assets/forbidden_terms.txt b/vector/src/main/assets/forbidden_terms.txt new file mode 100644 index 0000000000..693da9c520 --- /dev/null +++ b/vector/src/main/assets/forbidden_terms.txt @@ -0,0 +1,71 @@ +anal +bbc +bbw +bdsm +beast +bestiality +blowjob +bondage +boobs +clit +cock +cuck +cum +cunt +daddy +dick +dildo +erotic +exhibitionism +faggot +feet +femboy +fisting +flogging +fmf +foursome +futa +gangbang +gore +h3ntai +handjob +hentai +incest +jizz +kink +loli +m4f +masturbation +mfm +mfm +milf +moresome +naked +neet +nipple +nsfw +nude +nudity +orgy +pedo +pegging +penis +petplay +porn +pussy +rape +rimming +sadism +sadomasochism +sexy +shota +spank +squirt +strap-on +threesome +vagina +vibrator +voyeur +watersports +xxx +zoo \ No newline at end of file diff --git a/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt new file mode 100644 index 0000000000..addc04ed96 --- /dev/null +++ b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package im.vector.app.features.roomdirectory + +import im.vector.app.core.utils.AssetReader +import javax.inject.Inject + +class ExplicitTermFilter @Inject constructor( + assetReader: AssetReader +) { + // List of forbidden terms is in file asset forbidden_terms.txt, in lower case + private val explicitContentRegex = assetReader.readAssetFile("forbidden_terms.txt") + .orEmpty() + .split("\n") + .map { it.trim() } + .filter { it.isNotEmpty() } + .joinToString(prefix = ".*\\b(", separator = "|", postfix = ")\\b.*") + .toRegex(RegexOption.IGNORE_CASE) + + fun isValid(str: String): Boolean { + return explicitContentRegex.matches(str.replace("\n", " ")).not() + // Special treatment for "18+" since word boundaries does not work here + && str.contains("18+").not() + } +} diff --git a/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt b/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt index 4ef38758c7..9932fdb551 100644 --- a/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt +++ b/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt @@ -42,12 +42,12 @@ import org.matrix.android.sdk.api.session.room.model.thirdparty.RoomDirectoryDat import org.matrix.android.sdk.api.session.room.roomSummaryQueryParams import org.matrix.android.sdk.rx.rx import timber.log.Timber -import java.util.Locale class RoomDirectoryViewModel @AssistedInject constructor( @Assisted initialState: PublicRoomsViewState, vectorPreferences: VectorPreferences, - private val session: Session + private val session: Session, + private val explicitTermFilter: ExplicitTermFilter ) : VectorViewModel(initialState) { @AssistedFactory @@ -58,11 +58,6 @@ class RoomDirectoryViewModel @AssistedInject constructor( companion object : MvRxViewModelFactory { private const val PUBLIC_ROOMS_LIMIT = 20 - // List of forbidden terms, in lower case - private val explicitContentTerms = listOf( - "nsfw" - ) - @JvmStatic override fun create(viewModelContext: ViewModelContext, state: PublicRoomsViewState): RoomDirectoryViewModel? { val activity: RoomDirectoryActivity = (viewModelContext as ActivityViewModelContext).activity() @@ -202,11 +197,7 @@ class RoomDirectoryViewModel @AssistedInject constructor( // Filter val newPublicRooms = data.chunk.orEmpty() .filter { - showAllRooms - || "${it.name.orEmpty()} ${it.topic.orEmpty()} ${it.canonicalAlias.orEmpty()}".toLowerCase(Locale.ROOT) - .let { str -> - explicitContentTerms.all { term -> term !in str } - } + showAllRooms || explicitTermFilter.isValid("${it.name.orEmpty()} ${it.topic.orEmpty()}") } setState { From 153d393bf1a9a33316bd7fd7a22da653f6279a87 Mon Sep 17 00:00:00 2001 From: Benoit Marty Date: Thu, 8 Apr 2021 17:28:47 +0200 Subject: [PATCH 2/4] Prevent searching for forbidden terms --- .../features/roomdirectory/ExplicitTermFilterTest.kt | 10 ++++++++++ .../app/features/roomdirectory/ExplicitTermFilter.kt | 8 +++++++- .../features/roomdirectory/RoomDirectoryViewModel.kt | 11 +++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt index a5d8108ae9..7c66ad7462 100644 --- a/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt +++ b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt @@ -115,4 +115,14 @@ class ExplicitTermFilterTest : InstrumentedTest { fun isValidLastFalse() { explicitTermFilter.isValid("zoo") shouldBe false } + + @Test + fun canSearchForFalse() { + explicitTermFilter.canSearchFor("zoo") shouldBe false + } + + @Test + fun canSearchForTrue() { + explicitTermFilter.canSearchFor("android") shouldBe true + } } \ No newline at end of file diff --git a/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt index addc04ed96..8abccbbe5e 100644 --- a/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt +++ b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt @@ -23,14 +23,20 @@ class ExplicitTermFilter @Inject constructor( assetReader: AssetReader ) { // List of forbidden terms is in file asset forbidden_terms.txt, in lower case - private val explicitContentRegex = assetReader.readAssetFile("forbidden_terms.txt") + private val explicitTerms = assetReader.readAssetFile("forbidden_terms.txt") .orEmpty() .split("\n") .map { it.trim() } .filter { it.isNotEmpty() } + + private val explicitContentRegex = explicitTerms .joinToString(prefix = ".*\\b(", separator = "|", postfix = ")\\b.*") .toRegex(RegexOption.IGNORE_CASE) + fun canSearchFor(term: String): Boolean { + return term !in explicitTerms && term != "18+" + } + fun isValid(str: String): Boolean { return explicitContentRegex.matches(str.replace("\n", " ")).not() // Special treatment for "18+" since word boundaries does not work here diff --git a/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt b/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt index 9932fdb551..a6c4646f8c 100644 --- a/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt +++ b/vector/src/main/java/im/vector/app/features/roomdirectory/RoomDirectoryViewModel.kt @@ -161,6 +161,17 @@ class RoomDirectoryViewModel @AssistedInject constructor( } private fun load(filter: String, roomDirectoryData: RoomDirectoryData) { + if (!showAllRooms && !explicitTermFilter.canSearchFor(filter)) { + setState { + copy( + asyncPublicRoomsRequest = Success(Unit), + publicRooms = emptyList(), + hasMore = false + ) + } + return + } + currentJob = viewModelScope.launch { val data = try { session.getPublicRooms(roomDirectoryData.homeServer, From 1233fde2617ceaf40dc52e79514d2c0660e6c83f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 8 Apr 2021 18:26:55 +0100 Subject: [PATCH 3/4] Update forbidden_terms.txt --- vector/src/main/assets/forbidden_terms.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vector/src/main/assets/forbidden_terms.txt b/vector/src/main/assets/forbidden_terms.txt index 693da9c520..84e7fe1d28 100644 --- a/vector/src/main/assets/forbidden_terms.txt +++ b/vector/src/main/assets/forbidden_terms.txt @@ -1,5 +1,4 @@ anal -bbc bbw bdsm beast @@ -18,7 +17,6 @@ dildo erotic exhibitionism faggot -feet femboy fisting flogging @@ -35,14 +33,13 @@ jizz kink loli m4f +masturbate masturbation mfm -mfm milf moresome naked neet -nipple nsfw nude nudity @@ -68,4 +65,4 @@ vibrator voyeur watersports xxx -zoo \ No newline at end of file +zoo From f1e280827de7043d3c1a5a77d001bbf0e545b9f0 Mon Sep 17 00:00:00 2001 From: Benoit Marty Date: Thu, 8 Apr 2021 19:39:49 +0200 Subject: [PATCH 4/4] Ensure there is no dup and cleanup --- .../vector/app/features/roomdirectory/ExplicitTermFilterTest.kt | 2 +- .../im/vector/app/features/roomdirectory/ExplicitTermFilter.kt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt index 7c66ad7462..b2beec5b66 100644 --- a/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt +++ b/vector/src/androidTest/java/im/vector/app/features/roomdirectory/ExplicitTermFilterTest.kt @@ -125,4 +125,4 @@ class ExplicitTermFilterTest : InstrumentedTest { fun canSearchForTrue() { explicitTermFilter.canSearchFor("android") shouldBe true } -} \ No newline at end of file +} diff --git a/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt index 8abccbbe5e..0d1f55485c 100644 --- a/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt +++ b/vector/src/main/java/im/vector/app/features/roomdirectory/ExplicitTermFilter.kt @@ -27,6 +27,7 @@ class ExplicitTermFilter @Inject constructor( .orEmpty() .split("\n") .map { it.trim() } + .distinct() .filter { it.isNotEmpty() } private val explicitContentRegex = explicitTerms