From 3cbd85640046444945cc3711d041294cb0a35f87 Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Wed, 10 Nov 2021 16:25:47 -0500 Subject: [PATCH 01/10] This actually works! I haven't committed the actual files under solrj/solr/testproducts/conf because I want to see if I can winnow them down a bit before adding them. --- .../solr/client/solrj/SolrExampleXMLTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java index 538255b664ab..15542963ed14 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java @@ -16,21 +16,35 @@ */ package org.apache.solr.client.solrj; +import org.apache.commons.io.FileUtils; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.request.RequestWriter; +import org.eclipse.jetty.servlet.ServletHolder; import org.junit.BeforeClass; +import java.io.File; +import java.util.SortedMap; +import java.util.TreeMap; + /** * A subclass of SolrExampleTests that explicitly uses the xml codec for * communication. */ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleXMLTest extends SolrExampleTests { + @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + File tmpSolrHome = createTempDir().toFile(); + FileUtils.copyDirectory(new File(getFile("solrj/solr/testproducts/conf").getParent()), new File(tmpSolrHome.getAbsoluteFile(), "collection1")); + + FileUtils.copyFile(getFile("solrj/solr/solr.xml"), new File(tmpSolrHome.getAbsoluteFile(), "solr.xml")); + + final SortedMap extraServlets = new TreeMap<>(); + + createAndStartJetty(tmpSolrHome.getAbsolutePath(), "solrconfig.xml", "managed-schema", "/solr", true, extraServlets); } @Override From 316bb6c9677874933dc34e1d8d9cfc2be39e2f8c Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Wed, 10 Nov 2021 16:26:02 -0500 Subject: [PATCH 02/10] Revert "This actually works!" This reverts commit 3cbd85640046444945cc3711d041294cb0a35f87. --- .../solr/client/solrj/SolrExampleXMLTest.java | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java index 15542963ed14..538255b664ab 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java @@ -16,35 +16,21 @@ */ package org.apache.solr.client.solrj; -import org.apache.commons.io.FileUtils; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.request.RequestWriter; -import org.eclipse.jetty.servlet.ServletHolder; import org.junit.BeforeClass; -import java.io.File; -import java.util.SortedMap; -import java.util.TreeMap; - /** * A subclass of SolrExampleTests that explicitly uses the xml codec for * communication. */ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleXMLTest extends SolrExampleTests { - @BeforeClass public static void beforeTest() throws Exception { - File tmpSolrHome = createTempDir().toFile(); - FileUtils.copyDirectory(new File(getFile("solrj/solr/testproducts/conf").getParent()), new File(tmpSolrHome.getAbsoluteFile(), "collection1")); - - FileUtils.copyFile(getFile("solrj/solr/solr.xml"), new File(tmpSolrHome.getAbsoluteFile(), "solr.xml")); - - final SortedMap extraServlets = new TreeMap<>(); - - createAndStartJetty(tmpSolrHome.getAbsolutePath(), "solrconfig.xml", "managed-schema", "/solr", true, extraServlets); + createAndStartJetty(legacyExampleCollection1SolrHome()); } @Override From 151049ed3a72e22a3b76deca10b8457ca125aa6b Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Wed, 10 Nov 2021 16:27:21 -0500 Subject: [PATCH 03/10] Revert "Revert "This actually works!"" This reverts commit 316bb6c9677874933dc34e1d8d9cfc2be39e2f8c. --- .../solr/client/solrj/SolrExampleXMLTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java index 538255b664ab..15542963ed14 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java @@ -16,21 +16,35 @@ */ package org.apache.solr.client.solrj; +import org.apache.commons.io.FileUtils; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.request.RequestWriter; +import org.eclipse.jetty.servlet.ServletHolder; import org.junit.BeforeClass; +import java.io.File; +import java.util.SortedMap; +import java.util.TreeMap; + /** * A subclass of SolrExampleTests that explicitly uses the xml codec for * communication. */ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleXMLTest extends SolrExampleTests { + @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + File tmpSolrHome = createTempDir().toFile(); + FileUtils.copyDirectory(new File(getFile("solrj/solr/testproducts/conf").getParent()), new File(tmpSolrHome.getAbsoluteFile(), "collection1")); + + FileUtils.copyFile(getFile("solrj/solr/solr.xml"), new File(tmpSolrHome.getAbsoluteFile(), "solr.xml")); + + final SortedMap extraServlets = new TreeMap<>(); + + createAndStartJetty(tmpSolrHome.getAbsolutePath(), "solrconfig.xml", "managed-schema", "/solr", true, extraServlets); } @Override From df45daacc79a2edb57420e338cb644185827a389 Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Thu, 11 Nov 2021 07:38:50 -0500 Subject: [PATCH 04/10] Relocate the method looking up the "testproducts" config to SolrJettyTestBase legacyExampleCollection1SolrHome() now can pick between the old "techproducts", and the new only in solrj "testproducts" config, while I figure out if I can remove the use of legacyExampleCollection1SolrHome from the solr/core tests! --- .../solr/client/solrj/SolrExampleXMLTest.java | 9 +--- .../org/apache/solr/SolrJettyTestBase.java | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java index 15542963ed14..73f376e052be 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java @@ -37,14 +37,7 @@ public class SolrExampleXMLTest extends SolrExampleTests { @BeforeClass public static void beforeTest() throws Exception { - File tmpSolrHome = createTempDir().toFile(); - FileUtils.copyDirectory(new File(getFile("solrj/solr/testproducts/conf").getParent()), new File(tmpSolrHome.getAbsoluteFile(), "collection1")); - - FileUtils.copyFile(getFile("solrj/solr/solr.xml"), new File(tmpSolrHome.getAbsoluteFile(), "solr.xml")); - - final SortedMap extraServlets = new TreeMap<>(); - - createAndStartJetty(tmpSolrHome.getAbsolutePath(), "solrconfig.xml", "managed-schema", "/solr", true, extraServlets); + createAndStartJetty(legacyExampleCollection1SolrHome()); } @Override diff --git a/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java b/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java index 750dd15f701e..538ee511212b 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java @@ -168,6 +168,48 @@ public static void cleanUpJettyHome(File solrHome) throws Exception { } public static String legacyExampleCollection1SolrHome() { + // Let us pick which location we are pointing to, temporary for now!!! + //return legacyTechProductsCollection1SolrHome(); + return testProductsCollection1SolrHome(); + } + + public static String testProductsCollection1SolrHome(){ + File tmpSolrHome = createTempDir().toFile(); + try { + FileUtils.copyFile(getFile("solrj/solr/solr.xml"), new File(tmpSolrHome.getAbsoluteFile(), "solr.xml")); + File collection1Dir = new File(tmpSolrHome, "collection1"); + FileUtils.copyDirectory(new File(getFile("solrj/solr/testproducts/conf").getParent()), collection1Dir); + + Properties props = new Properties(); + props.setProperty("name", "collection1"); + OutputStreamWriter writer = null; + try { + writer = new OutputStreamWriter(FileUtils.openOutputStream( + new File(collection1Dir, "core.properties")), StandardCharsets.UTF_8); + props.store(writer, null); + } finally { + if (writer != null) { + try { + writer.close(); + } catch (Exception ignore){} + } + } + + } catch (Exception exc) { + if (exc instanceof RuntimeException) { + throw (RuntimeException)exc; + } else { + throw new RuntimeException(exc); + } + } + + return tmpSolrHome.getAbsolutePath(); + } + + public static String legacyTechProductsCollection1SolrHome() { + + + String sourceHome = ExternalPaths.SOURCE_HOME; if (sourceHome == null) throw new IllegalStateException("No source home! Cannot create the legacy example solr home directory."); From 23b457bbad4ac8a7b160f7033c77c8a325aacdfa Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Thu, 11 Nov 2021 09:47:44 -0500 Subject: [PATCH 05/10] ShowFileRequestHandlerTest works with a standard test config, doesn't need any special "techproducts" configs --- .../solr/handler/admin/ShowFileRequestHandlerTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/handler/admin/ShowFileRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/ShowFileRequestHandlerTest.java index 0e78329f32ec..5f0f2fe2d402 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/ShowFileRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/ShowFileRequestHandlerTest.java @@ -33,6 +33,7 @@ import org.apache.solr.response.SolrQueryResponse; import org.junit.BeforeClass; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Reader; @@ -46,8 +47,10 @@ public class ShowFileRequestHandlerTest extends SolrJettyTestBase { @BeforeClass public static void beforeTest() throws Exception { + File workDir = createTempDir().toFile(); + setupJettyTestHome(workDir, "collection1"); initCore("solrconfig.xml", "schema.xml"); - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(workDir.getAbsolutePath()); } public void test404ViaHttp() throws Exception { @@ -88,7 +91,7 @@ public void testDirList() throws SolrServerException, IOException { public void testGetRawFile() throws SolrServerException, IOException { SolrClient client = getSolrClient(); //assertQ(req("qt", "/admin/file")); TODO file bug that SolrJettyTestBase extends SolrTestCaseJ4 - QueryRequest request = new QueryRequest(params("file", "managed-schema")); + QueryRequest request = new QueryRequest(params("file", "schema.xml")); request.setPath("/admin/file"); final AtomicBoolean readFile = new AtomicBoolean(); request.setResponseParser(new ResponseParser() { From 537c8c808668debce1eb02c68fdac2877871662e Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Thu, 11 Nov 2021 14:02:06 -0500 Subject: [PATCH 06/10] JvmMetricsTest doesn't require a full blown techproducts configset to run use the standard collection1 --- .../src/test/org/apache/solr/metrics/JvmMetricsTest.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java index 7c007cb00d04..242f5997798a 100644 --- a/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/JvmMetricsTest.java @@ -16,6 +16,7 @@ */ package org.apache.solr.metrics; +import java.io.File; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; @@ -56,7 +57,10 @@ public class JvmMetricsTest extends SolrJettyTestBase { @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + File workDir = createTempDir().toFile(); + setupJettyTestHome(workDir, "collection1"); + initCore("solrconfig.xml", "schema.xml"); + createAndStartJetty(workDir.getAbsolutePath()); } @Test From 4e8e9772084724588af3d7f7565351d078af5a6d Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Thu, 11 Nov 2021 14:35:11 -0500 Subject: [PATCH 07/10] This test now passes for all the tests, including the testBootstrapWithChroot testBootstrapWithChroot was failing with the shorter setup, but seems to like the longer one. Not sure why. --- .../test/org/apache/solr/cloud/ZkCLITest.java | 58 ++++++++++++++++--- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java index 547124ec0937..70ea32005b18 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java @@ -16,22 +16,19 @@ */ package org.apache.solr.cloud; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.InputStream; -import java.io.PrintStream; +import java.io.*; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.Collection; import java.util.List; +import java.util.Properties; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.RegexFileFilter; import org.apache.commons.io.filefilter.TrueFileFilter; +import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.SolrJettyTestBase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; @@ -75,6 +72,53 @@ public static void afterClass() throws InterruptedException { System.clearProperty("solrcloud.skip.autorecovery"); } + public static String setupCollection1() { + + // I don't know why I have to go through all this rigamorole, copied from when this should work: +// +// File workDir = createTempDir().toFile(); +// setupJettyTestHome(workDir, "collection1"); +// initCore("solrconfig.xml", "schema.xml"); +// createAndStartJetty(workDir.getAbsolutePath()); + + // However the test testBootstrapWithChroot only works when I do all the rigamorole... The others all pass with the + // above commented out code. + + + File tempSolrHome = LuceneTestCase.createTempDir().toFile(); + try { + + org.apache.commons.io.FileUtils.copyFileToDirectory(getFile("solr/solr.xml"), tempSolrHome); + File collection1Dir = new File(tempSolrHome, "collection1"); + org.apache.commons.io.FileUtils.forceMkdir(collection1Dir); + + File configSetDir = getFile("solr/collection1/conf"); + org.apache.commons.io.FileUtils.copyDirectoryToDirectory(configSetDir, collection1Dir); + Properties props = new Properties(); + props.setProperty("name", "collection1"); + OutputStreamWriter writer = null; + try { + writer = new OutputStreamWriter(FileUtils.openOutputStream( + new File(collection1Dir, "core.properties")), StandardCharsets.UTF_8); + props.store(writer, null); + } finally { + if (writer != null) { + try { + writer.close(); + } catch (Exception ignore){} + } + } + } catch (Exception exc) { + if (exc instanceof RuntimeException) { + throw (RuntimeException)exc; + } else { + throw new RuntimeException(exc); + } + } + + return tempSolrHome.getAbsolutePath(); + } + @Override public void setUp() throws Exception { super.setUp(); @@ -82,7 +126,7 @@ public void setUp() throws Exception { log.info("####SETUP_START {}", getTestName()); } - String exampleHome = SolrJettyTestBase.legacyExampleCollection1SolrHome(); + String exampleHome = setupCollection1(); Path tmpDir = createTempDir(); solrHome = exampleHome; From bd482b6d638f4444bb400049f5b75366085c0105 Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Sun, 21 Nov 2021 19:46:21 -0500 Subject: [PATCH 08/10] unused import --- solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java index 70ea32005b18..a61c7ed24647 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java @@ -29,7 +29,6 @@ import org.apache.commons.io.filefilter.RegexFileFilter; import org.apache.commons.io.filefilter.TrueFileFilter; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.SolrJettyTestBase; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ClusterProperties; From d54fa9c8c371f1535b7b72be285314a2056afe97 Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Sun, 21 Nov 2021 21:15:38 -0500 Subject: [PATCH 09/10] Don't need to call createAndStartJetty for every subclass of SolrExampleTests Just set this up on SolrExampleTests --- .../solr/client/solrj/SolrExampleBinaryHttp2Test.java | 8 ++++---- .../apache/solr/client/solrj/SolrExampleBinaryTest.java | 8 ++++---- .../org/apache/solr/client/solrj/SolrExampleTests.java | 6 ++++++ .../client/solrj/embedded/SolrExampleEmbeddedTest.java | 8 ++++---- .../solr/client/solrj/embedded/SolrExampleJettyTest.java | 8 ++++---- .../solrj/embedded/SolrExampleStreamingHttp2Test.java | 8 ++++---- .../client/solrj/embedded/SolrExampleStreamingTest.java | 8 ++++---- .../client/solrj/embedded/SolrExampleXMLHttp2Test.java | 8 ++++---- 8 files changed, 34 insertions(+), 28 deletions(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryHttp2Test.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryHttp2Test.java index 89923cba6d03..9f677bd58952 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryHttp2Test.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryHttp2Test.java @@ -30,10 +30,10 @@ @SolrTestCaseJ4.SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleBinaryHttp2Test extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryTest.java index a4bd61ac3e76..7a44cbea935c 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleBinaryTest.java @@ -29,10 +29,10 @@ */ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleBinaryTest extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index 1eadc36b6508..e03615734afc 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -76,6 +76,7 @@ import org.apache.solr.util.RTimer; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.noggit.JSONParser; import org.slf4j.Logger; @@ -102,6 +103,11 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase ignoreException("uniqueKey"); } + @BeforeClass + public static void beforeTest() throws Exception { + createAndStartJetty(testProductsCollection1SolrHome()); + } + @Before public void emptyCollection() throws Exception { SolrClient client = getSolrClient(); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java index 05d871710a6a..89fa7f074b0a 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleEmbeddedTest.java @@ -27,8 +27,8 @@ */ public class SolrExampleEmbeddedTest extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java index d48c65fcf525..df385f8f7bb9 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java @@ -57,10 +57,10 @@ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleJettyTest extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Test public void testBadSetup() { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingHttp2Test.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingHttp2Test.java index 2dfbd675fad6..df6a2389229d 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingHttp2Test.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingHttp2Test.java @@ -34,10 +34,10 @@ public class SolrExampleStreamingHttp2Test extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java index a31d0772d7f2..dd1f00206f9c 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java @@ -39,10 +39,10 @@ @Slow public class SolrExampleStreamingTest extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleXMLHttp2Test.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleXMLHttp2Test.java index 8b169f4d6799..b90fb00b2095 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleXMLHttp2Test.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleXMLHttp2Test.java @@ -25,10 +25,10 @@ import org.junit.BeforeClass; public class SolrExampleXMLHttp2Test extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() { From 9e7e86649274c25bae4070893582a6b0195a3e3a Mon Sep 17 00:00:00 2001 From: "epugh@opensourceconnections.com" <> Date: Sun, 21 Nov 2021 21:20:17 -0500 Subject: [PATCH 10/10] Make a copy of the old techproducts in the solr/solrj/test-files path called "testproducts". Next step is to slim down the new "testproducts" to just what the tests need to pass, letting us change the old techproducts to be better at demoing features. --- .../solr/testproducts/conf/_rest_managed.json | 1 + .../_schema_analysis_stopwords_english.json | 38 + .../_schema_analysis_synonyms_english.json | 11 + .../solrj/solr/testproducts/conf/currency.xml | 67 + .../solrj/solr/testproducts/conf/elevate.xml | 42 + .../conf/lang/contractions_ca.txt | 8 + .../conf/lang/contractions_fr.txt | 15 + .../conf/lang/contractions_ga.txt | 5 + .../conf/lang/contractions_it.txt | 23 + .../conf/lang/hyphenations_ga.txt | 5 + .../testproducts/conf/lang/stemdict_nl.txt | 6 + .../testproducts/conf/lang/stoptags_ja.txt | 420 ++ .../testproducts/conf/lang/stopwords_ar.txt | 125 + .../testproducts/conf/lang/stopwords_bg.txt | 193 + .../testproducts/conf/lang/stopwords_ca.txt | 220 + .../testproducts/conf/lang/stopwords_ckb.txt | 136 + .../testproducts/conf/lang/stopwords_cz.txt | 172 + .../testproducts/conf/lang/stopwords_da.txt | 110 + .../testproducts/conf/lang/stopwords_de.txt | 294 ++ .../testproducts/conf/lang/stopwords_el.txt | 78 + .../testproducts/conf/lang/stopwords_en.txt | 54 + .../testproducts/conf/lang/stopwords_es.txt | 356 ++ .../testproducts/conf/lang/stopwords_et.txt | 1603 +++++++ .../testproducts/conf/lang/stopwords_eu.txt | 99 + .../testproducts/conf/lang/stopwords_fa.txt | 313 ++ .../testproducts/conf/lang/stopwords_fi.txt | 97 + .../testproducts/conf/lang/stopwords_fr.txt | 186 + .../testproducts/conf/lang/stopwords_ga.txt | 110 + .../testproducts/conf/lang/stopwords_gl.txt | 161 + .../testproducts/conf/lang/stopwords_hi.txt | 235 + .../testproducts/conf/lang/stopwords_hu.txt | 211 + .../testproducts/conf/lang/stopwords_hy.txt | 46 + .../testproducts/conf/lang/stopwords_id.txt | 359 ++ .../testproducts/conf/lang/stopwords_it.txt | 303 ++ .../testproducts/conf/lang/stopwords_ja.txt | 127 + .../testproducts/conf/lang/stopwords_lv.txt | 172 + .../testproducts/conf/lang/stopwords_nl.txt | 119 + .../testproducts/conf/lang/stopwords_no.txt | 194 + .../testproducts/conf/lang/stopwords_pt.txt | 253 ++ .../testproducts/conf/lang/stopwords_ro.txt | 233 + .../testproducts/conf/lang/stopwords_ru.txt | 243 ++ .../testproducts/conf/lang/stopwords_sv.txt | 133 + .../testproducts/conf/lang/stopwords_th.txt | 119 + .../testproducts/conf/lang/stopwords_tr.txt | 212 + .../testproducts/conf/lang/userdict_ja.txt | 29 + .../solr/testproducts/conf/managed-schema | 1202 ++++++ .../testproducts/conf/mapping-FoldToASCII.txt | 3813 +++++++++++++++++ .../conf/mapping-ISOLatin1Accent.txt | 246 ++ .../solrj/solr/testproducts/conf/params.json | 11 + .../solr/testproducts/conf/protwords.txt | 21 + .../solr/testproducts/conf/solrconfig.xml | 1423 ++++++ .../solr/testproducts/conf/spellings.txt | 2 + .../solr/testproducts/conf/stopwords.txt | 14 + .../solrj/solr/testproducts/conf/synonyms.txt | 29 + .../solr/testproducts/conf/update-script.js | 53 + .../solr/testproducts/conf/xslt/example.xsl | 132 + .../testproducts/conf/xslt/example_atom.xsl | 67 + .../testproducts/conf/xslt/example_rss.xsl | 66 + .../solr/testproducts/conf/xslt/luke.xsl | 337 ++ .../solr/testproducts/conf/xslt/updateXml.xsl | 74 + .../solr/client/solrj/SolrExampleXMLTest.java | 14 +- .../solr/client/solrj/TestBatchUpdate.java | 2 +- .../client/solrj/TestSolrJErrorHandling.java | 2 +- .../solrj/embedded/JettyWebappTest.java | 2 +- .../embedded/LargeVolumeEmbeddedTest.java | 6 +- .../solrj/impl/BasicHttpSolrClientTest.java | 2 +- ...rentUpdateHttp2SolrClientBadInputTest.java | 2 +- .../ConcurrentUpdateHttp2SolrClientTest.java | 2 +- ...oncurrentUpdateSolrClientBadInputTest.java | 2 +- .../impl/ConcurrentUpdateSolrClientTest.java | 2 +- .../Http2SolrClientCompatibilityTest.java | 6 +- .../solrj/impl/Http2SolrClientTest.java | 2 +- .../impl/HttpSolrClientBadInputTest.java | 2 +- .../solrj/impl/HttpSolrClientConPoolTest.java | 4 +- .../impl/LBHttpSolrClientBadInputTest.java | 2 +- .../response/NoOpResponseParserTest.java | 2 +- .../solrj/response/TestSuggesterResponse.java | 2 +- .../common/cloud/TestZkConfigSetService.java | 2 +- .../solr/EmbeddedSolrServerTestBase.java | 5 +- .../org/apache/solr/SolrJettyTestBase.java | 90 +- 80 files changed, 15503 insertions(+), 76 deletions(-) create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/_rest_managed.json create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_stopwords_english.json create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_synonyms_english.json create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/currency.xml create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/elevate.xml create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ca.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_fr.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ga.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_it.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/hyphenations_ga.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stemdict_nl.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stoptags_ja.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ar.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_bg.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ca.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ckb.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_cz.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_da.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_de.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_el.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_en.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_es.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_et.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_eu.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fa.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fi.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fr.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ga.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_gl.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hi.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hu.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hy.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_id.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_it.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ja.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_lv.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_nl.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_no.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_pt.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ro.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ru.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_sv.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_th.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_tr.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/userdict_ja.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/managed-schema create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-FoldToASCII.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-ISOLatin1Accent.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/params.json create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/protwords.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/solrconfig.xml create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/spellings.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/stopwords.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/synonyms.txt create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/update-script.js create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example.xsl create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_atom.xsl create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_rss.xsl create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/luke.xsl create mode 100644 solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/updateXml.xsl diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_rest_managed.json b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_rest_managed.json new file mode 100644 index 000000000000..6a4aec39edcd --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_rest_managed.json @@ -0,0 +1 @@ +{"initArgs":{},"managedList":[]} diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_stopwords_english.json b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_stopwords_english.json new file mode 100644 index 000000000000..a694e5c376a0 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_stopwords_english.json @@ -0,0 +1,38 @@ +{ + "initArgs":{"ignoreCase":true}, + "managedList":[ + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "but", + "by", + "for", + "if", + "in", + "into", + "is", + "it", + "no", + "not", + "of", + "on", + "or", + "stopworda", + "stopwordb", + "such", + "that", + "the", + "their", + "then", + "there", + "these", + "they", + "this", + "to", + "was", + "will", + "with"]} diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_synonyms_english.json b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_synonyms_english.json new file mode 100644 index 000000000000..869bdce05140 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/_schema_analysis_synonyms_english.json @@ -0,0 +1,11 @@ +{ + "initArgs":{ + "ignoreCase":true, + "format":"solr" + }, + "managedMap":{ + "GB":["GiB","Gigabyte"], + "happy":["glad","joyful"], + "TV":["Television"] + } +} diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/currency.xml b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/currency.xml new file mode 100644 index 000000000000..3a9c58afee88 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/currency.xml @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/elevate.xml b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/elevate.xml new file mode 100644 index 000000000000..b4072d04f914 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/elevate.xml @@ -0,0 +1,42 @@ + + + + + + + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ca.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ca.txt new file mode 100644 index 000000000000..307a85f913d4 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_fr.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_fr.txt new file mode 100644 index 000000000000..f1bba51b23e1 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ga.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ga.txt new file mode 100644 index 000000000000..9ebe7fa349a3 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_it.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_it.txt new file mode 100644 index 000000000000..cac040953726 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/hyphenations_ga.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/hyphenations_ga.txt new file mode 100644 index 000000000000..4d2642cc5a37 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stemdict_nl.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stemdict_nl.txt new file mode 100644 index 000000000000..441072971d33 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stoptags_ja.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stoptags_ja.txt new file mode 100644 index 000000000000..71b750845e37 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ar.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ar.txt new file mode 100644 index 000000000000..046829db6a26 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_bg.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_bg.txt new file mode 100644 index 000000000000..1ae4ba2ae38b --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ca.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ca.txt new file mode 100644 index 000000000000..3da65deafe14 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ckb.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ckb.txt new file mode 100644 index 000000000000..87abf118fec6 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ckb.txt @@ -0,0 +1,136 @@ +# set of kurdish stopwords +# note these have been normalized with our scheme (e represented with U+06D5, etc) +# constructed from: +# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al) +# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston) +# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc + +# and +و +# which +کە +# of +ی +# made/did +کرد +# that/which +ئەوەی +# on/head +سەر +# two +دوو +# also +هەروەها +# from/that +لەو +# makes/does +دەکات +# some +چەند +# every +هەر + +# demonstratives +# that +ئەو +# this +ئەم + +# personal pronouns +# I +من +# we +ئێمە +# you +تۆ +# you +ئێوە +# he/she/it +ئەو +# they +ئەوان + +# prepositions +# to/with/by +بە +پێ +# without +بەبێ +# along with/while/during +بەدەم +# in the opinion of +بەلای +# according to +بەپێی +# before +بەرلە +# in the direction of +بەرەوی +# in front of/toward +بەرەوە +# before/in the face of +بەردەم +# without +بێ +# except for +بێجگە +# for +بۆ +# on/in +دە +تێ +# with +دەگەڵ +# after +دوای +# except for/aside from +جگە +# in/from +لە +لێ +# in front of/before/because of +لەبەر +# between/among +لەبەینی +# concerning/about +لەبابەت +# concerning +لەبارەی +# instead of +لەباتی +# beside +لەبن +# instead of +لەبرێتی +# behind +لەدەم +# with/together with +لەگەڵ +# by +لەلایەن +# within +لەناو +# between/among +لەنێو +# for the sake of +لەپێناوی +# with respect to +لەرەوی +# by means of/for +لەرێ +# for the sake of +لەرێگا +# on/on top of/according to +لەسەر +# under +لەژێر +# between/among +ناو +# between/among +نێوان +# after +پاش +# before +پێش +# like +وەک diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_cz.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_cz.txt new file mode 100644 index 000000000000..53c6097dac7e --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_da.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_da.txt new file mode 100644 index 000000000000..42e6145b98e9 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_da.txt @@ -0,0 +1,110 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_de.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_de.txt new file mode 100644 index 000000000000..86525e7ae082 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_de.txt @@ -0,0 +1,294 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_el.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_el.txt new file mode 100644 index 000000000000..232681f5bd6d --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_en.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_en.txt new file mode 100644 index 000000000000..2c164c0b2a1e --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_es.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_es.txt new file mode 100644 index 000000000000..487d78c8d560 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_es.txt @@ -0,0 +1,356 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_et.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_et.txt new file mode 100644 index 000000000000..1b06a134b9a3 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_et.txt @@ -0,0 +1,1603 @@ +# Estonian stopwords list +all +alla +allapoole +allpool +alt +altpoolt +eel +eespool +enne +hommikupoole +hoolimata +ilma +kaudu +keset +kesk +kohe +koos +kuhupoole +kuni +kuspool +kustpoolt +kõige +käsikäes +lappi +ligi +läbi +mööda +paitsi +peale +pealepoole +pealpool +pealt +pealtpoolt +piki +pikku +piku +pikuti +põiki +pärast +päri +risti +sealpool +sealtpoolt +seespool +seltsis +siiapoole +siinpool +siitpoolt +sinnapoole +sissepoole +taga +tagantpoolt +tagapidi +tagapool +taha +tahapoole +teispool +teispoole +tänu +tükkis +vaatamata +vastu +väljapoole +väljaspool +väljastpoolt +õhtupoole +ühes +ühestükis +ühestükkis +ülalpool +ülaltpoolt +üle +ülespoole +ülevalpool +ülevaltpoolt +ümber +ümbert +aegu +aegus +alguks +algul +algule +algult +alguni +all +alla +alt +alul +alutsi +arvel +asemel +asemele +eel +eeli +ees +eesotsas +eest +eestotsast +esitsi +ette +etteotsa +haaval +heaks +hoolimata +hulgas +hulgast +hulka +jalgu +jalus +jalust +jaoks +jooksul +juurde +juures +juurest +jälil +jälile +järel +järele +järelt +järgi +kaasas +kallal +kallale +kallalt +kamul +kannul +kannule +kannult +kaudu +kaupa +keskel +keskele +keskelt +keskis +keskpaiku +kestel +kestes +kilda +killas +killast +kimpu +kimpus +kiuste +kohal +kohale +kohalt +kohaselt +kohe +kohta +koos +korral +kukil +kukile +kukilt +kulul +kõrva +kõrval +kõrvale +kõrvalt +kõrvas +kõrvast +käekõrval +käekõrvale +käekõrvalt +käes +käest +kätte +külge +küljes +küljest +küüsi +küüsis +küüsist +ligi +ligidal +ligidale +ligidalt +aegu +aegus +alguks +algul +algule +algult +alguni +all +alla +alt +alul +alutsi +arvel +asemel +asemele +eel +eeli +ees +eesotsas +eest +eestotsast +esitsi +ette +etteotsa +haaval +heaks +hoolimata +hulgas +hulgast +hulka +jalgu +jalus +jalust +jaoks +jooksul +juurde +juures +juurest +jälil +jälile +järel +järele +järelt +järgi +kaasas +kallal +kallale +kallalt +kamul +kannul +kannule +kannult +kaudu +kaupa +keskel +keskele +keskelt +keskis +keskpaiku +kestel +kestes +kilda +killas +killast +kimpu +kimpus +kiuste +kohal +kohale +kohalt +kohaselt +kohe +kohta +koos +korral +kukil +kukile +kukilt +kulul +kõrva +kõrval +kõrvale +kõrvalt +kõrvas +kõrvast +käekõrval +käekõrvale +käekõrvalt +käes +käest +kätte +külge +küljes +küljest +küüsi +küüsis +küüsist +ligi +ligidal +ligidale +ligidalt +lool +läbi +lähedal +lähedale +lähedalt +man +mant +manu +meelest +mööda +nahas +nahka +nahkas +najal +najale +najalt +nõjal +nõjale +otsa +otsas +otsast +paigale +paigu +paiku +peal +peale +pealt +perra +perrä +pidi +pihta +piki +pikku +pool +poole +poolest +poolt +puhul +puksiiris +pähe +päralt +päras +pärast +päri +ringi +ringis +risust +saadetusel +saadik +saatel +saati +seas +seast +sees +seest +sekka +seljataga +seltsi +seltsis +seltsist +sisse +slepis +suhtes +šlepis +taga +tagant +tagantotsast +tagaotsas +tagaselja +tagasi +tagast +tagutsi +taha +tahaotsa +takka +tarvis +tasa +tuuri +tuuris +tõttu +tükkis +uhal +vaatamata +vahel +vahele +vahelt +vahepeal +vahepeale +vahepealt +vahetsi +varal +varale +varul +vastas +vastast +vastu +veerde +veeres +viisi +võidu +võrd +võrdki +võrra +võrragi +väel +väele +vältel +väärt +väärtki +äärde +ääre +ääres +äärest +ühes +üle +ümber +ümbert +a +abil +aina +ainult +alalt +alates +alati +alles +b +c +d +e +eales +ealeski +edasi +edaspidi +eelkõige +eemal +ei +eks +end +enda +enese +ennem +esialgu +f +g +h +hoopis +i +iganes +igatahes +igati +iial +iialgi +ikka +ikkagi +ilmaski +iseenda +iseenese +iseenesest +isegi +j +jah +ju +juba +juhul +just +järelikult +k +ka +kah +kas +kasvõi +keda +kestahes +kogu +koguni +kohati +kokku +kuhu +kuhugi +kuidagi +kuidas +kunagi +kus +kusagil +kusjuures +kuskil +kust +kõigepealt +küll +l +liiga +lisaks +m +miks +mil +millal +millalgi +mispärast +mistahes +mistõttu +mitte +muide +muidu +muidugi +muist +mujal +mujale +mujalt +mõlemad +mõnda +mõne +mõnikord +n +nii +niikaua +niimoodi +niipaljuke +niisama +niisiis +niivõrd +nõnda +nüüd +o +omaette +omakorda +omavahel +ometi +p +palju +paljuke +palju-palju +peaaegu +peagi +peamiselt +pigem +pisut +praegu +päris +r +rohkem +s +samas +samuti +seal +sealt +sedakorda +sedapuhku +seega +seejuures +seejärel +seekord +seepärast +seetõttu +sellepärast +seni +sestap +siia +siiani +siin +siinkohal +siis +siiski +siit +sinna +suht +š +z +ž +t +teel +teineteise +tõesti +täiesti +u +umbes +v +w +veel +veelgi +vist +võibolla +võib-olla +väga +vähemalt +välja +väljas +väljast +õ +ä +ära +ö +ü +ühtlasi +üksi +ükskõik +ülal +ülale +ülalt +üles +ülesse +üleval +ülevalt +ülimalt +üsna +x +y +aga +ega +ehk +ehkki +elik +ellik +enge +ennegu +ent +et +ja +justkui +kui +kuid +kuigi +kuivõrd +kuna +kuni +kut +mistab +muudkui +nagu +nigu +ning +olgugi +otsekui +otsenagu +selmet +sest +sestab +vaid +või +aa +adaa +adjöö +ae +ah +ahaa +ahah +ah-ah-ah +ah-haa +ahoi +ai +aidaa +aidu-raidu +aih +aijeh +aituma +aitäh +aitüma +ammuu +amps +ampsti +aptsih +ass +at +ata +at-at-at +atsih +atsihh +auh +bai-bai +bingo +braavo +brr +ee +eeh +eh +ehee +eheh +eh-eh-hee +eh-eh-ee +ehei +ehh +ehhee +einoh +ena +ennäe +ennäh +fuh +fui +fuih +haa +hah +hahaa +hah-hah-hah +halleluuja +hallo +halloo +hass +hee +heh +he-he-hee +hei +heldeke(ne) +heureka +hihii +hip-hip-hurraa +hmh +hmjah +hoh-hoh-hoo +hohoo +hoi +hollallaa +hoo +hoplaa +hopp +hops +hopsassaa +hopsti +hosianna +huh +huidii +huist +hurjah +hurjeh +hurjoh +hurjuh +hurraa +huu +hõhõh +hõi +hõissa +hõissassa +hõk +hõkk +häh +hä-hä-hää +hüvasti +ih-ah-haa +ih-ih-hii +ii-ha-ha +issake +issakene +isver +jaa-ah +ja-ah +jaah +janäe +jeeh +jeerum +jeever +jessas +jestas +juhhei +jumalaga +jumalime +jumaluke +jumalukene +jutas +kaaps +kaapsti +kaasike +kae +kalps +kalpsti +kannäe +kanäe +kappadi +kaps +kapsti +karkõmm +karkäuh +karkääks +karkääksti +karmauh +karmauhti +karnaps +karnapsti +karniuhti +karpartsaki +karpauh +karpauhti +karplauh +karplauhti +karprauh +karprauhti +karsumdi +karsumm +kartsumdi +kartsumm +karviuh +karviuhti +kaske +kassa +kauh +kauhti +keh +keksti +kepsti +khe +khm +kih +kiiks +kiiksti +kiis +kiiss +kikerii +kikerikii +kili +kilk +kilk-kõlk +kilks +kilks-kolks +kilks-kõlks +kill +killadi +killadi|-kolladi +killadi-kõlladi +killa-kolla +killa-kõlla +kill-kõll +kimps-komps +kipp +kips-kõps +kiriküüt +kirra-kõrra +kirr-kõrr +kirts +klaps +klapsti +klirdi +klirr +klonks +klops +klopsti +kluk +klu-kluu +klõks +klõksti +klõmdi +klõmm +klõmpsti +klõnks +klõnksti +klõps +klõpsti +kläu +kohva-kohva +kok +koks +koksti +kolaki +kolk +kolks +kolksti +koll +kolladi +komp +komps +kompsti +kop +kopp +koppadi +kops +kopsti +kossu +kotsu +kraa +kraak +kraaks +kraaps +kraapsti +krahh +kraks +kraksti +kraps +krapsti +krauh +krauhti +kriiks +kriiksti +kriips +kriips-kraaps +kripa-krõpa +krips-kraps +kriuh +kriuks +kriuksti +kromps +kronk +kronks +krooks +kruu +krõks +krõksti +krõpa +krõps +krõpsti +krõuh +kräu +kräuh +kräuhti +kräuks +kss +kukeleegu +kukku +kuku +kulu +kurluu +kurnäu +kuss +kussu +kõks +kõksti +kõldi +kõlks +kõlksti +kõll +kõmaki +kõmdi +kõmm +kõmps +kõpp +kõps +kõpsadi +kõpsat +kõpsti +kõrr +kõrra-kõrra +kõss +kõtt +kõõksti +kärr +kärts +kärtsti +käuks +käuksti +kääga +kääks +kääksti +köh +köki-möki +köksti +laks +laksti +lampsti +larts +lartsti +lats +latsti +leelo +legoo +lehva +liiri-lõõri +lika-lõka +likat-lõkat +limpsti +lips +lipsti +lirts +lirtsaki +lirtsti +lonksti +lops +lopsti +lorts +lortsti +luks +lups +lupsti +lurts +lurtsti +lõks +lõksti +lõmps +lõmpsti +lõnks +lõnksti +lärts +lärtsti +läts +lätsti +lörts +lörtsti +lötsti +lööps +lööpsti +marss +mats +matsti +mauh +mauhti +mh +mhh +mhmh +miau +mjaa +mkm +m-mh +mnjaa +mnjah +moens +mulks +mulksti +mull-mull +mull-mull-mull +muu +muuh +mõh +mõmm +mäh +mäts +mäu +mää +möh +möh-öh-ää +möö +müh-müh +mühüh +müks +müksti +müraki +mürr +mürts +mürtsaki +mürtsti +mütaku +müta-mäta +müta-müta +müt-müt +müt-müt-müt +müts +mütsti +mütt +naa +naah +nah +naks +naksti +nanuu +naps +napsti +nilpsti +nipsti +nirr +niuh +niuh-näuh +niuhti +noh +noksti +nolpsti +nonoh +nonoo +nonäh +noo +nooh +nooks +norr +nurr +nuuts +nõh +nõhh +nõka-nõka +nõks +nõksat-nõksat +nõks-nõks +nõksti +nõõ +nõõh +näeh +näh +nälpsti +nämm-nämm +näpsti +näts +nätsti +näu +näuh +näuhti +näuks +näuksti +nääh +nääks +nühkat-nühkat +oeh +oh +ohh +ohhh +oh-hoi +oh-hoo +ohoh +oh-oh-oo +oh-oh-hoo +ohoi +ohoo +oi +oih +oijee +oijeh +oo +ooh +oo-oh +oo-ohh +oot +ossa +ot +paa +pah +pahh +pakaa +pamm +pantsti +pardon +pardonks +parlartsti +parts +partsti +partsumdi +partsumm +pastoi +pats +patst +patsti +pau +pauh +pauhti +pele +pfui +phuh +phuuh +phäh +phähh +piiks +piip +piiri-pääri +pimm +pimm-pamm +pimm-pomm +pimm-põmm +piraki +piuks +piu-pau +plaks +plaksti +plarts +plartsti +plats +platsti +plauh +plauhh +plauhti +pliks +pliks-plaks +plinn +pliraki +plirts +plirtsti +pliu +pliuh +ploks +plotsti +plumps +plumpsti +plõks +plõksti +plõmdi +plõmm +plõnn +plärr +plärts +plärtsat +plärtsti +pläu +pläuh +plää +plörtsat +pomm +popp +pops +popsti +ports +pot +pots +potsti +pott +praks +praksti +prants +prantsaki +prantsti +prassai +prauh +prauhh +prauhti +priks +priuh +priuhh +priuh-prauh +proosit +proost +prr +prrr +prõks +prõksti +prõmdi +prõmm +prõntsti +prääk +prääks +pst +psst +ptrr +ptruu +ptüi +puh +puhh +puksti +pumm +pumps +pup-pup-pup +purts +puuh +põks +põksti +põmdi +põmm +põmmadi +põnks +põnn +põnnadi +põnt +põnts +põntsti +põraki +põrr +põrra-põrra +päh +pähh +päntsti +pää +pöörd +püh +raks +raksti +raps +rapsti +ratataa +rauh +riips +riipsti +riks +riks-raks +rips-raps +rivitult +robaki +rops +ropsaki +ropsti +ruik +räntsti +räts +röh +röhh +sah +sahh +sahkat +saps +sapsti +sauh +sauhti +servus +sihkadi-sahkadi +sihka-sahka +sihkat-sahkat +silks +silk-solk +sips +sipsti +sirr +sirr-sorr +sirts +sirtsti +siu +siuh +siuh-sauh +siuh-säuh +siuhti +siuks +siuts +skool +so +soh +solks +solksti +solpsti +soo +sooh +so-oh +soo-oh +sopp +sops +sopsti +sorr +sorts +sortsti +so-soo +soss +soss-soss +ss +sss +sst +stopp +suhkat-sahkat +sulk +sulks +sulksti +sull +sulla-sulla +sulpa-sulpa +sulps +sulpsti +sumaki +sumdi +summ +summat-summat +sups +supsaku +supsti +surts +surtsti +suss +susti +suts +sutsti +säh +sähke +särts +särtsti +säu +säuh +säuhti +taevake +taevakene +takk +tere +terekest +tibi-tibi +tikk-takk +tiks +tilk +tilks +till +tilla-talla +till-tall +tilulii +tinn +tip +tip-tap +tirr +tirtsti +tiu +tjaa +tjah +tohhoh +tohhoo +tohoh +tohoo +tok +tokk +toks +toksti +tonks +tonksti +tota +totsti +tot-tot +tprr +tpruu +trah +trahh +trallallaa +trill +trillallaa +trr +trrr +tsah +tsahh +tsilk +tsilk-tsolk +tsirr +tsiuh +tskae +tsolk +tss +tst +tsst +tsuhh +tsuk +tsumm +tsurr +tsäuh +tšao +tšš +tššš +tuk +tuks +turts +turtsti +tutki +tutkit +tutu-lutu +tutulutu +tuut +tuutu-luutu +tõks +tötsti +tümps +uh +uhh +uh-huu +uhtsa +uhtsaa +uhuh +uhuu +ui +uih +uih-aih +uijah +uijeh +uist +uit +uka +upsti +uraa +urjah +urjeh +urjoh +urjuh +urr +urraa +ust +utu +uu +uuh +vaak +vaat +vae +vaeh +vai +vat +vau +vhüüt +vidiit +viiks +vilks +vilksti +vinki-vinki +virdi +virr +viu +viudi +viuh +viuhti +voeh +voh +vohh +volks +volksti +vooh +vops +vopsti +vot +vuh +vuhti +vuih +vulks +vulksti +vull +vulpsti +vups +vupsaki +vupsaku +vupsti +vurdi +vurr +vurra-vurra +vurts +vurtsti +vutt +võe +võeh +või +võih +võrr +võts +võtt +vääks +õe +õits +õk +õkk +õrr +õss +õuh +äh +ähh +ähhähhää +äh-hää +äh-äh-hää +äiu +äiu-ää +äss +ää +ääh +äähh +öh +öhh +ök +üh +eelmine +eikeegi +eimiski +emb-kumb +enam +enim +iga +igasugune +igaüks +ise +isesugune +järgmine +keegi +kes +kumb +kumbki +kõik +meiesugune +meietaoline +midagi +mihuke +mihukene +milletaoline +milline +mina +minake +mingi +mingisugune +minusugune +minutaoline +mis +miski +miskisugune +missugune +misuke +mitmes +mitmesugune +mitu +mitu-mitu +mitu-setu +muu +mõlema +mõnesugune +mõni +mõningane +mõningas +mäherdune +määrane +naasugune +need +nemad +nendesugune +nendetaoline +nihuke +nihukene +niimitu +niisamasugune +niisugune +nisuke +nisukene +oma +omaenese +omasugune +omataoline +pool +praegune +sama +samasugune +samataoline +see +seesama +seesamane +seesamune +seesinane +seesugune +selline +sihuke +sihukene +sina +sinusugune +sinutaoline +siuke +siukene +säherdune +säärane +taoline +teiesugune +teine +teistsugune +tema +temake +temakene +temasugune +temataoline +too +toosama +toosamane +üks +üksteise +hakkama +minema +olema +pidama +saama +tegema +tulema +võima diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_eu.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_eu.txt new file mode 100644 index 000000000000..25f1db934606 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fa.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fa.txt new file mode 100644 index 000000000000..723641c6da78 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fi.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fi.txt new file mode 100644 index 000000000000..4372c9a055bb --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fi.txt @@ -0,0 +1,97 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fr.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fr.txt new file mode 100644 index 000000000000..749abae6846c --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_fr.txt @@ -0,0 +1,186 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ga.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ga.txt new file mode 100644 index 000000000000..9ff88d747e57 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_gl.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_gl.txt new file mode 100644 index 000000000000..d8760b12c144 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hi.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hi.txt new file mode 100644 index 000000000000..86286bb083be --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hu.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hu.txt new file mode 100644 index 000000000000..37526da8aa98 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hu.txt @@ -0,0 +1,211 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hy.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hy.txt new file mode 100644 index 000000000000..60c1c50fbc83 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_id.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_id.txt new file mode 100644 index 000000000000..4617f83a5c5c --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_it.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_it.txt new file mode 100644 index 000000000000..1219cc773ab4 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_it.txt @@ -0,0 +1,303 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ja.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ja.txt new file mode 100644 index 000000000000..d4321be6b164 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_lv.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_lv.txt new file mode 100644 index 000000000000..e21a23c06c35 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_nl.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_nl.txt new file mode 100644 index 000000000000..47a2aeacf6f2 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_nl.txt @@ -0,0 +1,119 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_no.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_no.txt new file mode 100644 index 000000000000..a7a2c28ba544 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_no.txt @@ -0,0 +1,194 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard , Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_pt.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_pt.txt new file mode 100644 index 000000000000..acfeb01af6bd --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_pt.txt @@ -0,0 +1,253 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ro.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ro.txt new file mode 100644 index 000000000000..4fdee90a5ba3 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ru.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ru.txt new file mode 100644 index 000000000000..55271400c643 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_ru.txt @@ -0,0 +1,243 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_sv.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_sv.txt new file mode 100644 index 000000000000..096f87f6766f --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_sv.txt @@ -0,0 +1,133 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_th.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_th.txt new file mode 100644 index 000000000000..07f0fabe6920 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_tr.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_tr.txt new file mode 100644 index 000000000000..84d9408d4ea3 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/userdict_ja.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/userdict_ja.txt new file mode 100644 index 000000000000..6f0368e4d817 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# , ... , ... , +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/managed-schema b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/managed-schema new file mode 100644 index 000000000000..3c4abfccc776 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/managed-schema @@ -0,0 +1,1202 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-FoldToASCII.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-FoldToASCII.txt new file mode 100644 index 000000000000..9a84b6eac341 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-FoldToASCII.txt @@ -0,0 +1,3813 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This map converts alphabetic, numeric, and symbolic Unicode characters +# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode +# block) into their ASCII equivalents, if one exists. +# +# Characters from the following Unicode blocks are converted; however, only +# those characters with reasonable ASCII alternatives are converted: +# +# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf +# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf +# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf +# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf +# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf +# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf +# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf +# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf +# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf +# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf +# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf +# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf +# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf +# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf +# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf +# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf +# +# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode +# +# The set of character conversions supported by this map is a superset of +# those supported by the map represented by mapping-ISOLatin1Accent.txt. +# +# See the bottom of this file for the Perl script used to generate the contents +# of this file (without this header) from ASCIIFoldingFilter.java. + + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + + +# À [LATIN CAPITAL LETTER A WITH GRAVE] +"\u00C0" => "A" + +# Á [LATIN CAPITAL LETTER A WITH ACUTE] +"\u00C1" => "A" + +#  [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] +"\u00C2" => "A" + +# à [LATIN CAPITAL LETTER A WITH TILDE] +"\u00C3" => "A" + +# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS] +"\u00C4" => "A" + +# Å [LATIN CAPITAL LETTER A WITH RING ABOVE] +"\u00C5" => "A" + +# Ā [LATIN CAPITAL LETTER A WITH MACRON] +"\u0100" => "A" + +# Ă [LATIN CAPITAL LETTER A WITH BREVE] +"\u0102" => "A" + +# Ą [LATIN CAPITAL LETTER A WITH OGONEK] +"\u0104" => "A" + +# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA] +"\u018F" => "A" + +# Ǎ [LATIN CAPITAL LETTER A WITH CARON] +"\u01CD" => "A" + +# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] +"\u01DE" => "A" + +# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] +"\u01E0" => "A" + +# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] +"\u01FA" => "A" + +# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] +"\u0200" => "A" + +# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE] +"\u0202" => "A" + +# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE] +"\u0226" => "A" + +# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE] +"\u023A" => "A" + +# ᴀ [LATIN LETTER SMALL CAPITAL A] +"\u1D00" => "A" + +# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW] +"\u1E00" => "A" + +# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW] +"\u1EA0" => "A" + +# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE] +"\u1EA2" => "A" + +# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] +"\u1EA4" => "A" + +# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] +"\u1EA6" => "A" + +# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EA8" => "A" + +# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] +"\u1EAA" => "A" + +# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] +"\u1EAC" => "A" + +# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] +"\u1EAE" => "A" + +# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] +"\u1EB0" => "A" + +# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] +"\u1EB2" => "A" + +# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] +"\u1EB4" => "A" + +# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] +"\u1EB6" => "A" + +# Ⓐ [CIRCLED LATIN CAPITAL LETTER A] +"\u24B6" => "A" + +# A [FULLWIDTH LATIN CAPITAL LETTER A] +"\uFF21" => "A" + +# à [LATIN SMALL LETTER A WITH GRAVE] +"\u00E0" => "a" + +# á [LATIN SMALL LETTER A WITH ACUTE] +"\u00E1" => "a" + +# â [LATIN SMALL LETTER A WITH CIRCUMFLEX] +"\u00E2" => "a" + +# ã [LATIN SMALL LETTER A WITH TILDE] +"\u00E3" => "a" + +# ä [LATIN SMALL LETTER A WITH DIAERESIS] +"\u00E4" => "a" + +# å [LATIN SMALL LETTER A WITH RING ABOVE] +"\u00E5" => "a" + +# ā [LATIN SMALL LETTER A WITH MACRON] +"\u0101" => "a" + +# ă [LATIN SMALL LETTER A WITH BREVE] +"\u0103" => "a" + +# ą [LATIN SMALL LETTER A WITH OGONEK] +"\u0105" => "a" + +# ǎ [LATIN SMALL LETTER A WITH CARON] +"\u01CE" => "a" + +# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] +"\u01DF" => "a" + +# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] +"\u01E1" => "a" + +# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] +"\u01FB" => "a" + +# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE] +"\u0201" => "a" + +# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE] +"\u0203" => "a" + +# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE] +"\u0227" => "a" + +# ɐ [LATIN SMALL LETTER TURNED A] +"\u0250" => "a" + +# ə [LATIN SMALL LETTER SCHWA] +"\u0259" => "a" + +# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK] +"\u025A" => "a" + +# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK] +"\u1D8F" => "a" + +# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] +"\u1D95" => "a" + +# ạ [LATIN SMALL LETTER A WITH RING BELOW] +"\u1E01" => "a" + +# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING] +"\u1E9A" => "a" + +# ạ [LATIN SMALL LETTER A WITH DOT BELOW] +"\u1EA1" => "a" + +# ả [LATIN SMALL LETTER A WITH HOOK ABOVE] +"\u1EA3" => "a" + +# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] +"\u1EA5" => "a" + +# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] +"\u1EA7" => "a" + +# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EA9" => "a" + +# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] +"\u1EAB" => "a" + +# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] +"\u1EAD" => "a" + +# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE] +"\u1EAF" => "a" + +# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE] +"\u1EB1" => "a" + +# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] +"\u1EB3" => "a" + +# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE] +"\u1EB5" => "a" + +# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] +"\u1EB7" => "a" + +# ₐ [LATIN SUBSCRIPT SMALL LETTER A] +"\u2090" => "a" + +# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA] +"\u2094" => "a" + +# ⓐ [CIRCLED LATIN SMALL LETTER A] +"\u24D0" => "a" + +# ⱥ [LATIN SMALL LETTER A WITH STROKE] +"\u2C65" => "a" + +# Ɐ [LATIN CAPITAL LETTER TURNED A] +"\u2C6F" => "a" + +# a [FULLWIDTH LATIN SMALL LETTER A] +"\uFF41" => "a" + +# Ꜳ [LATIN CAPITAL LETTER AA] +"\uA732" => "AA" + +# Æ [LATIN CAPITAL LETTER AE] +"\u00C6" => "AE" + +# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON] +"\u01E2" => "AE" + +# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE] +"\u01FC" => "AE" + +# ᴁ [LATIN LETTER SMALL CAPITAL AE] +"\u1D01" => "AE" + +# Ꜵ [LATIN CAPITAL LETTER AO] +"\uA734" => "AO" + +# Ꜷ [LATIN CAPITAL LETTER AU] +"\uA736" => "AU" + +# Ꜹ [LATIN CAPITAL LETTER AV] +"\uA738" => "AV" + +# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] +"\uA73A" => "AV" + +# Ꜽ [LATIN CAPITAL LETTER AY] +"\uA73C" => "AY" + +# ⒜ [PARENTHESIZED LATIN SMALL LETTER A] +"\u249C" => "(a)" + +# ꜳ [LATIN SMALL LETTER AA] +"\uA733" => "aa" + +# æ [LATIN SMALL LETTER AE] +"\u00E6" => "ae" + +# ǣ [LATIN SMALL LETTER AE WITH MACRON] +"\u01E3" => "ae" + +# ǽ [LATIN SMALL LETTER AE WITH ACUTE] +"\u01FD" => "ae" + +# ᴂ [LATIN SMALL LETTER TURNED AE] +"\u1D02" => "ae" + +# ꜵ [LATIN SMALL LETTER AO] +"\uA735" => "ao" + +# ꜷ [LATIN SMALL LETTER AU] +"\uA737" => "au" + +# ꜹ [LATIN SMALL LETTER AV] +"\uA739" => "av" + +# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] +"\uA73B" => "av" + +# ꜽ [LATIN SMALL LETTER AY] +"\uA73D" => "ay" + +# Ɓ [LATIN CAPITAL LETTER B WITH HOOK] +"\u0181" => "B" + +# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR] +"\u0182" => "B" + +# Ƀ [LATIN CAPITAL LETTER B WITH STROKE] +"\u0243" => "B" + +# ʙ [LATIN LETTER SMALL CAPITAL B] +"\u0299" => "B" + +# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B] +"\u1D03" => "B" + +# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE] +"\u1E02" => "B" + +# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW] +"\u1E04" => "B" + +# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW] +"\u1E06" => "B" + +# Ⓑ [CIRCLED LATIN CAPITAL LETTER B] +"\u24B7" => "B" + +# B [FULLWIDTH LATIN CAPITAL LETTER B] +"\uFF22" => "B" + +# ƀ [LATIN SMALL LETTER B WITH STROKE] +"\u0180" => "b" + +# ƃ [LATIN SMALL LETTER B WITH TOPBAR] +"\u0183" => "b" + +# ɓ [LATIN SMALL LETTER B WITH HOOK] +"\u0253" => "b" + +# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE] +"\u1D6C" => "b" + +# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK] +"\u1D80" => "b" + +# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE] +"\u1E03" => "b" + +# ḅ [LATIN SMALL LETTER B WITH DOT BELOW] +"\u1E05" => "b" + +# ḇ [LATIN SMALL LETTER B WITH LINE BELOW] +"\u1E07" => "b" + +# ⓑ [CIRCLED LATIN SMALL LETTER B] +"\u24D1" => "b" + +# b [FULLWIDTH LATIN SMALL LETTER B] +"\uFF42" => "b" + +# ⒝ [PARENTHESIZED LATIN SMALL LETTER B] +"\u249D" => "(b)" + +# Ç [LATIN CAPITAL LETTER C WITH CEDILLA] +"\u00C7" => "C" + +# Ć [LATIN CAPITAL LETTER C WITH ACUTE] +"\u0106" => "C" + +# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] +"\u0108" => "C" + +# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE] +"\u010A" => "C" + +# Č [LATIN CAPITAL LETTER C WITH CARON] +"\u010C" => "C" + +# Ƈ [LATIN CAPITAL LETTER C WITH HOOK] +"\u0187" => "C" + +# Ȼ [LATIN CAPITAL LETTER C WITH STROKE] +"\u023B" => "C" + +# ʗ [LATIN LETTER STRETCHED C] +"\u0297" => "C" + +# ᴄ [LATIN LETTER SMALL CAPITAL C] +"\u1D04" => "C" + +# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] +"\u1E08" => "C" + +# Ⓒ [CIRCLED LATIN CAPITAL LETTER C] +"\u24B8" => "C" + +# C [FULLWIDTH LATIN CAPITAL LETTER C] +"\uFF23" => "C" + +# ç [LATIN SMALL LETTER C WITH CEDILLA] +"\u00E7" => "c" + +# ć [LATIN SMALL LETTER C WITH ACUTE] +"\u0107" => "c" + +# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX] +"\u0109" => "c" + +# ċ [LATIN SMALL LETTER C WITH DOT ABOVE] +"\u010B" => "c" + +# č [LATIN SMALL LETTER C WITH CARON] +"\u010D" => "c" + +# ƈ [LATIN SMALL LETTER C WITH HOOK] +"\u0188" => "c" + +# ȼ [LATIN SMALL LETTER C WITH STROKE] +"\u023C" => "c" + +# ɕ [LATIN SMALL LETTER C WITH CURL] +"\u0255" => "c" + +# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] +"\u1E09" => "c" + +# ↄ [LATIN SMALL LETTER REVERSED C] +"\u2184" => "c" + +# ⓒ [CIRCLED LATIN SMALL LETTER C] +"\u24D2" => "c" + +# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT] +"\uA73E" => "c" + +# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT] +"\uA73F" => "c" + +# c [FULLWIDTH LATIN SMALL LETTER C] +"\uFF43" => "c" + +# ⒞ [PARENTHESIZED LATIN SMALL LETTER C] +"\u249E" => "(c)" + +# Ð [LATIN CAPITAL LETTER ETH] +"\u00D0" => "D" + +# Ď [LATIN CAPITAL LETTER D WITH CARON] +"\u010E" => "D" + +# Đ [LATIN CAPITAL LETTER D WITH STROKE] +"\u0110" => "D" + +# Ɖ [LATIN CAPITAL LETTER AFRICAN D] +"\u0189" => "D" + +# Ɗ [LATIN CAPITAL LETTER D WITH HOOK] +"\u018A" => "D" + +# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR] +"\u018B" => "D" + +# ᴅ [LATIN LETTER SMALL CAPITAL D] +"\u1D05" => "D" + +# ᴆ [LATIN LETTER SMALL CAPITAL ETH] +"\u1D06" => "D" + +# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE] +"\u1E0A" => "D" + +# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW] +"\u1E0C" => "D" + +# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW] +"\u1E0E" => "D" + +# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA] +"\u1E10" => "D" + +# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] +"\u1E12" => "D" + +# Ⓓ [CIRCLED LATIN CAPITAL LETTER D] +"\u24B9" => "D" + +# Ꝺ [LATIN CAPITAL LETTER INSULAR D] +"\uA779" => "D" + +# D [FULLWIDTH LATIN CAPITAL LETTER D] +"\uFF24" => "D" + +# ð [LATIN SMALL LETTER ETH] +"\u00F0" => "d" + +# ď [LATIN SMALL LETTER D WITH CARON] +"\u010F" => "d" + +# đ [LATIN SMALL LETTER D WITH STROKE] +"\u0111" => "d" + +# ƌ [LATIN SMALL LETTER D WITH TOPBAR] +"\u018C" => "d" + +# ȡ [LATIN SMALL LETTER D WITH CURL] +"\u0221" => "d" + +# ɖ [LATIN SMALL LETTER D WITH TAIL] +"\u0256" => "d" + +# ɗ [LATIN SMALL LETTER D WITH HOOK] +"\u0257" => "d" + +# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE] +"\u1D6D" => "d" + +# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK] +"\u1D81" => "d" + +# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL] +"\u1D91" => "d" + +# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE] +"\u1E0B" => "d" + +# ḍ [LATIN SMALL LETTER D WITH DOT BELOW] +"\u1E0D" => "d" + +# ḏ [LATIN SMALL LETTER D WITH LINE BELOW] +"\u1E0F" => "d" + +# ḑ [LATIN SMALL LETTER D WITH CEDILLA] +"\u1E11" => "d" + +# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] +"\u1E13" => "d" + +# ⓓ [CIRCLED LATIN SMALL LETTER D] +"\u24D3" => "d" + +# ꝺ [LATIN SMALL LETTER INSULAR D] +"\uA77A" => "d" + +# d [FULLWIDTH LATIN SMALL LETTER D] +"\uFF44" => "d" + +# DŽ [LATIN CAPITAL LETTER DZ WITH CARON] +"\u01C4" => "DZ" + +# DZ [LATIN CAPITAL LETTER DZ] +"\u01F1" => "DZ" + +# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] +"\u01C5" => "Dz" + +# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] +"\u01F2" => "Dz" + +# ⒟ [PARENTHESIZED LATIN SMALL LETTER D] +"\u249F" => "(d)" + +# ȸ [LATIN SMALL LETTER DB DIGRAPH] +"\u0238" => "db" + +# dž [LATIN SMALL LETTER DZ WITH CARON] +"\u01C6" => "dz" + +# dz [LATIN SMALL LETTER DZ] +"\u01F3" => "dz" + +# ʣ [LATIN SMALL LETTER DZ DIGRAPH] +"\u02A3" => "dz" + +# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] +"\u02A5" => "dz" + +# È [LATIN CAPITAL LETTER E WITH GRAVE] +"\u00C8" => "E" + +# É [LATIN CAPITAL LETTER E WITH ACUTE] +"\u00C9" => "E" + +# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] +"\u00CA" => "E" + +# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS] +"\u00CB" => "E" + +# Ē [LATIN CAPITAL LETTER E WITH MACRON] +"\u0112" => "E" + +# Ĕ [LATIN CAPITAL LETTER E WITH BREVE] +"\u0114" => "E" + +# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE] +"\u0116" => "E" + +# Ę [LATIN CAPITAL LETTER E WITH OGONEK] +"\u0118" => "E" + +# Ě [LATIN CAPITAL LETTER E WITH CARON] +"\u011A" => "E" + +# Ǝ [LATIN CAPITAL LETTER REVERSED E] +"\u018E" => "E" + +# Ɛ [LATIN CAPITAL LETTER OPEN E] +"\u0190" => "E" + +# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] +"\u0204" => "E" + +# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE] +"\u0206" => "E" + +# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA] +"\u0228" => "E" + +# Ɇ [LATIN CAPITAL LETTER E WITH STROKE] +"\u0246" => "E" + +# ᴇ [LATIN LETTER SMALL CAPITAL E] +"\u1D07" => "E" + +# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] +"\u1E14" => "E" + +# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] +"\u1E16" => "E" + +# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] +"\u1E18" => "E" + +# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW] +"\u1E1A" => "E" + +# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] +"\u1E1C" => "E" + +# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW] +"\u1EB8" => "E" + +# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE] +"\u1EBA" => "E" + +# Ẽ [LATIN CAPITAL LETTER E WITH TILDE] +"\u1EBC" => "E" + +# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] +"\u1EBE" => "E" + +# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] +"\u1EC0" => "E" + +# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EC2" => "E" + +# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] +"\u1EC4" => "E" + +# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] +"\u1EC6" => "E" + +# Ⓔ [CIRCLED LATIN CAPITAL LETTER E] +"\u24BA" => "E" + +# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E] +"\u2C7B" => "E" + +# E [FULLWIDTH LATIN CAPITAL LETTER E] +"\uFF25" => "E" + +# è [LATIN SMALL LETTER E WITH GRAVE] +"\u00E8" => "e" + +# é [LATIN SMALL LETTER E WITH ACUTE] +"\u00E9" => "e" + +# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX] +"\u00EA" => "e" + +# ë [LATIN SMALL LETTER E WITH DIAERESIS] +"\u00EB" => "e" + +# ē [LATIN SMALL LETTER E WITH MACRON] +"\u0113" => "e" + +# ĕ [LATIN SMALL LETTER E WITH BREVE] +"\u0115" => "e" + +# ė [LATIN SMALL LETTER E WITH DOT ABOVE] +"\u0117" => "e" + +# ę [LATIN SMALL LETTER E WITH OGONEK] +"\u0119" => "e" + +# ě [LATIN SMALL LETTER E WITH CARON] +"\u011B" => "e" + +# ǝ [LATIN SMALL LETTER TURNED E] +"\u01DD" => "e" + +# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE] +"\u0205" => "e" + +# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE] +"\u0207" => "e" + +# ȩ [LATIN SMALL LETTER E WITH CEDILLA] +"\u0229" => "e" + +# ɇ [LATIN SMALL LETTER E WITH STROKE] +"\u0247" => "e" + +# ɘ [LATIN SMALL LETTER REVERSED E] +"\u0258" => "e" + +# ɛ [LATIN SMALL LETTER OPEN E] +"\u025B" => "e" + +# ɜ [LATIN SMALL LETTER REVERSED OPEN E] +"\u025C" => "e" + +# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] +"\u025D" => "e" + +# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E] +"\u025E" => "e" + +# ʚ [LATIN SMALL LETTER CLOSED OPEN E] +"\u029A" => "e" + +# ᴈ [LATIN SMALL LETTER TURNED OPEN E] +"\u1D08" => "e" + +# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK] +"\u1D92" => "e" + +# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] +"\u1D93" => "e" + +# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] +"\u1D94" => "e" + +# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE] +"\u1E15" => "e" + +# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE] +"\u1E17" => "e" + +# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] +"\u1E19" => "e" + +# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW] +"\u1E1B" => "e" + +# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] +"\u1E1D" => "e" + +# ẹ [LATIN SMALL LETTER E WITH DOT BELOW] +"\u1EB9" => "e" + +# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE] +"\u1EBB" => "e" + +# ẽ [LATIN SMALL LETTER E WITH TILDE] +"\u1EBD" => "e" + +# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] +"\u1EBF" => "e" + +# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] +"\u1EC1" => "e" + +# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1EC3" => "e" + +# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] +"\u1EC5" => "e" + +# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] +"\u1EC7" => "e" + +# ₑ [LATIN SUBSCRIPT SMALL LETTER E] +"\u2091" => "e" + +# ⓔ [CIRCLED LATIN SMALL LETTER E] +"\u24D4" => "e" + +# ⱸ [LATIN SMALL LETTER E WITH NOTCH] +"\u2C78" => "e" + +# e [FULLWIDTH LATIN SMALL LETTER E] +"\uFF45" => "e" + +# ⒠ [PARENTHESIZED LATIN SMALL LETTER E] +"\u24A0" => "(e)" + +# Ƒ [LATIN CAPITAL LETTER F WITH HOOK] +"\u0191" => "F" + +# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE] +"\u1E1E" => "F" + +# Ⓕ [CIRCLED LATIN CAPITAL LETTER F] +"\u24BB" => "F" + +# ꜰ [LATIN LETTER SMALL CAPITAL F] +"\uA730" => "F" + +# Ꝼ [LATIN CAPITAL LETTER INSULAR F] +"\uA77B" => "F" + +# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F] +"\uA7FB" => "F" + +# F [FULLWIDTH LATIN CAPITAL LETTER F] +"\uFF26" => "F" + +# ƒ [LATIN SMALL LETTER F WITH HOOK] +"\u0192" => "f" + +# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE] +"\u1D6E" => "f" + +# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK] +"\u1D82" => "f" + +# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE] +"\u1E1F" => "f" + +# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE] +"\u1E9B" => "f" + +# ⓕ [CIRCLED LATIN SMALL LETTER F] +"\u24D5" => "f" + +# ꝼ [LATIN SMALL LETTER INSULAR F] +"\uA77C" => "f" + +# f [FULLWIDTH LATIN SMALL LETTER F] +"\uFF46" => "f" + +# ⒡ [PARENTHESIZED LATIN SMALL LETTER F] +"\u24A1" => "(f)" + +# ff [LATIN SMALL LIGATURE FF] +"\uFB00" => "ff" + +# ffi [LATIN SMALL LIGATURE FFI] +"\uFB03" => "ffi" + +# ffl [LATIN SMALL LIGATURE FFL] +"\uFB04" => "ffl" + +# fi [LATIN SMALL LIGATURE FI] +"\uFB01" => "fi" + +# fl [LATIN SMALL LIGATURE FL] +"\uFB02" => "fl" + +# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] +"\u011C" => "G" + +# Ğ [LATIN CAPITAL LETTER G WITH BREVE] +"\u011E" => "G" + +# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE] +"\u0120" => "G" + +# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA] +"\u0122" => "G" + +# Ɠ [LATIN CAPITAL LETTER G WITH HOOK] +"\u0193" => "G" + +# Ǥ [LATIN CAPITAL LETTER G WITH STROKE] +"\u01E4" => "G" + +# ǥ [LATIN SMALL LETTER G WITH STROKE] +"\u01E5" => "G" + +# Ǧ [LATIN CAPITAL LETTER G WITH CARON] +"\u01E6" => "G" + +# ǧ [LATIN SMALL LETTER G WITH CARON] +"\u01E7" => "G" + +# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE] +"\u01F4" => "G" + +# ɢ [LATIN LETTER SMALL CAPITAL G] +"\u0262" => "G" + +# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK] +"\u029B" => "G" + +# Ḡ [LATIN CAPITAL LETTER G WITH MACRON] +"\u1E20" => "G" + +# Ⓖ [CIRCLED LATIN CAPITAL LETTER G] +"\u24BC" => "G" + +# Ᵹ [LATIN CAPITAL LETTER INSULAR G] +"\uA77D" => "G" + +# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G] +"\uA77E" => "G" + +# G [FULLWIDTH LATIN CAPITAL LETTER G] +"\uFF27" => "G" + +# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX] +"\u011D" => "g" + +# ğ [LATIN SMALL LETTER G WITH BREVE] +"\u011F" => "g" + +# ġ [LATIN SMALL LETTER G WITH DOT ABOVE] +"\u0121" => "g" + +# ģ [LATIN SMALL LETTER G WITH CEDILLA] +"\u0123" => "g" + +# ǵ [LATIN SMALL LETTER G WITH ACUTE] +"\u01F5" => "g" + +# ɠ [LATIN SMALL LETTER G WITH HOOK] +"\u0260" => "g" + +# ɡ [LATIN SMALL LETTER SCRIPT G] +"\u0261" => "g" + +# ᵷ [LATIN SMALL LETTER TURNED G] +"\u1D77" => "g" + +# ᵹ [LATIN SMALL LETTER INSULAR G] +"\u1D79" => "g" + +# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK] +"\u1D83" => "g" + +# ḡ [LATIN SMALL LETTER G WITH MACRON] +"\u1E21" => "g" + +# ⓖ [CIRCLED LATIN SMALL LETTER G] +"\u24D6" => "g" + +# ꝿ [LATIN SMALL LETTER TURNED INSULAR G] +"\uA77F" => "g" + +# g [FULLWIDTH LATIN SMALL LETTER G] +"\uFF47" => "g" + +# ⒢ [PARENTHESIZED LATIN SMALL LETTER G] +"\u24A2" => "(g)" + +# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] +"\u0124" => "H" + +# Ħ [LATIN CAPITAL LETTER H WITH STROKE] +"\u0126" => "H" + +# Ȟ [LATIN CAPITAL LETTER H WITH CARON] +"\u021E" => "H" + +# ʜ [LATIN LETTER SMALL CAPITAL H] +"\u029C" => "H" + +# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE] +"\u1E22" => "H" + +# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW] +"\u1E24" => "H" + +# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS] +"\u1E26" => "H" + +# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA] +"\u1E28" => "H" + +# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW] +"\u1E2A" => "H" + +# Ⓗ [CIRCLED LATIN CAPITAL LETTER H] +"\u24BD" => "H" + +# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER] +"\u2C67" => "H" + +# Ⱶ [LATIN CAPITAL LETTER HALF H] +"\u2C75" => "H" + +# H [FULLWIDTH LATIN CAPITAL LETTER H] +"\uFF28" => "H" + +# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX] +"\u0125" => "h" + +# ħ [LATIN SMALL LETTER H WITH STROKE] +"\u0127" => "h" + +# ȟ [LATIN SMALL LETTER H WITH CARON] +"\u021F" => "h" + +# ɥ [LATIN SMALL LETTER TURNED H] +"\u0265" => "h" + +# ɦ [LATIN SMALL LETTER H WITH HOOK] +"\u0266" => "h" + +# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK] +"\u02AE" => "h" + +# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] +"\u02AF" => "h" + +# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE] +"\u1E23" => "h" + +# ḥ [LATIN SMALL LETTER H WITH DOT BELOW] +"\u1E25" => "h" + +# ḧ [LATIN SMALL LETTER H WITH DIAERESIS] +"\u1E27" => "h" + +# ḩ [LATIN SMALL LETTER H WITH CEDILLA] +"\u1E29" => "h" + +# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW] +"\u1E2B" => "h" + +# ẖ [LATIN SMALL LETTER H WITH LINE BELOW] +"\u1E96" => "h" + +# ⓗ [CIRCLED LATIN SMALL LETTER H] +"\u24D7" => "h" + +# ⱨ [LATIN SMALL LETTER H WITH DESCENDER] +"\u2C68" => "h" + +# ⱶ [LATIN SMALL LETTER HALF H] +"\u2C76" => "h" + +# h [FULLWIDTH LATIN SMALL LETTER H] +"\uFF48" => "h" + +# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR] +"\u01F6" => "HV" + +# ⒣ [PARENTHESIZED LATIN SMALL LETTER H] +"\u24A3" => "(h)" + +# ƕ [LATIN SMALL LETTER HV] +"\u0195" => "hv" + +# Ì [LATIN CAPITAL LETTER I WITH GRAVE] +"\u00CC" => "I" + +# Í [LATIN CAPITAL LETTER I WITH ACUTE] +"\u00CD" => "I" + +# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] +"\u00CE" => "I" + +# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS] +"\u00CF" => "I" + +# Ĩ [LATIN CAPITAL LETTER I WITH TILDE] +"\u0128" => "I" + +# Ī [LATIN CAPITAL LETTER I WITH MACRON] +"\u012A" => "I" + +# Ĭ [LATIN CAPITAL LETTER I WITH BREVE] +"\u012C" => "I" + +# Į [LATIN CAPITAL LETTER I WITH OGONEK] +"\u012E" => "I" + +# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE] +"\u0130" => "I" + +# Ɩ [LATIN CAPITAL LETTER IOTA] +"\u0196" => "I" + +# Ɨ [LATIN CAPITAL LETTER I WITH STROKE] +"\u0197" => "I" + +# Ǐ [LATIN CAPITAL LETTER I WITH CARON] +"\u01CF" => "I" + +# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] +"\u0208" => "I" + +# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE] +"\u020A" => "I" + +# ɪ [LATIN LETTER SMALL CAPITAL I] +"\u026A" => "I" + +# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE] +"\u1D7B" => "I" + +# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW] +"\u1E2C" => "I" + +# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] +"\u1E2E" => "I" + +# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE] +"\u1EC8" => "I" + +# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW] +"\u1ECA" => "I" + +# Ⓘ [CIRCLED LATIN CAPITAL LETTER I] +"\u24BE" => "I" + +# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA] +"\uA7FE" => "I" + +# I [FULLWIDTH LATIN CAPITAL LETTER I] +"\uFF29" => "I" + +# ì [LATIN SMALL LETTER I WITH GRAVE] +"\u00EC" => "i" + +# í [LATIN SMALL LETTER I WITH ACUTE] +"\u00ED" => "i" + +# î [LATIN SMALL LETTER I WITH CIRCUMFLEX] +"\u00EE" => "i" + +# ï [LATIN SMALL LETTER I WITH DIAERESIS] +"\u00EF" => "i" + +# ĩ [LATIN SMALL LETTER I WITH TILDE] +"\u0129" => "i" + +# ī [LATIN SMALL LETTER I WITH MACRON] +"\u012B" => "i" + +# ĭ [LATIN SMALL LETTER I WITH BREVE] +"\u012D" => "i" + +# į [LATIN SMALL LETTER I WITH OGONEK] +"\u012F" => "i" + +# ı [LATIN SMALL LETTER DOTLESS I] +"\u0131" => "i" + +# ǐ [LATIN SMALL LETTER I WITH CARON] +"\u01D0" => "i" + +# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE] +"\u0209" => "i" + +# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE] +"\u020B" => "i" + +# ɨ [LATIN SMALL LETTER I WITH STROKE] +"\u0268" => "i" + +# ᴉ [LATIN SMALL LETTER TURNED I] +"\u1D09" => "i" + +# ᵢ [LATIN SUBSCRIPT SMALL LETTER I] +"\u1D62" => "i" + +# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE] +"\u1D7C" => "i" + +# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK] +"\u1D96" => "i" + +# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW] +"\u1E2D" => "i" + +# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] +"\u1E2F" => "i" + +# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE] +"\u1EC9" => "i" + +# ị [LATIN SMALL LETTER I WITH DOT BELOW] +"\u1ECB" => "i" + +# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I] +"\u2071" => "i" + +# ⓘ [CIRCLED LATIN SMALL LETTER I] +"\u24D8" => "i" + +# i [FULLWIDTH LATIN SMALL LETTER I] +"\uFF49" => "i" + +# IJ [LATIN CAPITAL LIGATURE IJ] +"\u0132" => "IJ" + +# ⒤ [PARENTHESIZED LATIN SMALL LETTER I] +"\u24A4" => "(i)" + +# ij [LATIN SMALL LIGATURE IJ] +"\u0133" => "ij" + +# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] +"\u0134" => "J" + +# Ɉ [LATIN CAPITAL LETTER J WITH STROKE] +"\u0248" => "J" + +# ᴊ [LATIN LETTER SMALL CAPITAL J] +"\u1D0A" => "J" + +# Ⓙ [CIRCLED LATIN CAPITAL LETTER J] +"\u24BF" => "J" + +# J [FULLWIDTH LATIN CAPITAL LETTER J] +"\uFF2A" => "J" + +# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX] +"\u0135" => "j" + +# ǰ [LATIN SMALL LETTER J WITH CARON] +"\u01F0" => "j" + +# ȷ [LATIN SMALL LETTER DOTLESS J] +"\u0237" => "j" + +# ɉ [LATIN SMALL LETTER J WITH STROKE] +"\u0249" => "j" + +# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE] +"\u025F" => "j" + +# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] +"\u0284" => "j" + +# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL] +"\u029D" => "j" + +# ⓙ [CIRCLED LATIN SMALL LETTER J] +"\u24D9" => "j" + +# ⱼ [LATIN SUBSCRIPT SMALL LETTER J] +"\u2C7C" => "j" + +# j [FULLWIDTH LATIN SMALL LETTER J] +"\uFF4A" => "j" + +# ⒥ [PARENTHESIZED LATIN SMALL LETTER J] +"\u24A5" => "(j)" + +# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA] +"\u0136" => "K" + +# Ƙ [LATIN CAPITAL LETTER K WITH HOOK] +"\u0198" => "K" + +# Ǩ [LATIN CAPITAL LETTER K WITH CARON] +"\u01E8" => "K" + +# ᴋ [LATIN LETTER SMALL CAPITAL K] +"\u1D0B" => "K" + +# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE] +"\u1E30" => "K" + +# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW] +"\u1E32" => "K" + +# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW] +"\u1E34" => "K" + +# Ⓚ [CIRCLED LATIN CAPITAL LETTER K] +"\u24C0" => "K" + +# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER] +"\u2C69" => "K" + +# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE] +"\uA740" => "K" + +# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] +"\uA742" => "K" + +# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] +"\uA744" => "K" + +# K [FULLWIDTH LATIN CAPITAL LETTER K] +"\uFF2B" => "K" + +# ķ [LATIN SMALL LETTER K WITH CEDILLA] +"\u0137" => "k" + +# ƙ [LATIN SMALL LETTER K WITH HOOK] +"\u0199" => "k" + +# ǩ [LATIN SMALL LETTER K WITH CARON] +"\u01E9" => "k" + +# ʞ [LATIN SMALL LETTER TURNED K] +"\u029E" => "k" + +# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK] +"\u1D84" => "k" + +# ḱ [LATIN SMALL LETTER K WITH ACUTE] +"\u1E31" => "k" + +# ḳ [LATIN SMALL LETTER K WITH DOT BELOW] +"\u1E33" => "k" + +# ḵ [LATIN SMALL LETTER K WITH LINE BELOW] +"\u1E35" => "k" + +# ⓚ [CIRCLED LATIN SMALL LETTER K] +"\u24DA" => "k" + +# ⱪ [LATIN SMALL LETTER K WITH DESCENDER] +"\u2C6A" => "k" + +# ꝁ [LATIN SMALL LETTER K WITH STROKE] +"\uA741" => "k" + +# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE] +"\uA743" => "k" + +# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] +"\uA745" => "k" + +# k [FULLWIDTH LATIN SMALL LETTER K] +"\uFF4B" => "k" + +# ⒦ [PARENTHESIZED LATIN SMALL LETTER K] +"\u24A6" => "(k)" + +# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE] +"\u0139" => "L" + +# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA] +"\u013B" => "L" + +# Ľ [LATIN CAPITAL LETTER L WITH CARON] +"\u013D" => "L" + +# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT] +"\u013F" => "L" + +# Ł [LATIN CAPITAL LETTER L WITH STROKE] +"\u0141" => "L" + +# Ƚ [LATIN CAPITAL LETTER L WITH BAR] +"\u023D" => "L" + +# ʟ [LATIN LETTER SMALL CAPITAL L] +"\u029F" => "L" + +# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE] +"\u1D0C" => "L" + +# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW] +"\u1E36" => "L" + +# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] +"\u1E38" => "L" + +# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW] +"\u1E3A" => "L" + +# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] +"\u1E3C" => "L" + +# Ⓛ [CIRCLED LATIN CAPITAL LETTER L] +"\u24C1" => "L" + +# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR] +"\u2C60" => "L" + +# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] +"\u2C62" => "L" + +# Ꝇ [LATIN CAPITAL LETTER BROKEN L] +"\uA746" => "L" + +# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE] +"\uA748" => "L" + +# Ꞁ [LATIN CAPITAL LETTER TURNED L] +"\uA780" => "L" + +# L [FULLWIDTH LATIN CAPITAL LETTER L] +"\uFF2C" => "L" + +# ĺ [LATIN SMALL LETTER L WITH ACUTE] +"\u013A" => "l" + +# ļ [LATIN SMALL LETTER L WITH CEDILLA] +"\u013C" => "l" + +# ľ [LATIN SMALL LETTER L WITH CARON] +"\u013E" => "l" + +# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT] +"\u0140" => "l" + +# ł [LATIN SMALL LETTER L WITH STROKE] +"\u0142" => "l" + +# ƚ [LATIN SMALL LETTER L WITH BAR] +"\u019A" => "l" + +# ȴ [LATIN SMALL LETTER L WITH CURL] +"\u0234" => "l" + +# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE] +"\u026B" => "l" + +# ɬ [LATIN SMALL LETTER L WITH BELT] +"\u026C" => "l" + +# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK] +"\u026D" => "l" + +# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK] +"\u1D85" => "l" + +# ḷ [LATIN SMALL LETTER L WITH DOT BELOW] +"\u1E37" => "l" + +# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] +"\u1E39" => "l" + +# ḻ [LATIN SMALL LETTER L WITH LINE BELOW] +"\u1E3B" => "l" + +# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] +"\u1E3D" => "l" + +# ⓛ [CIRCLED LATIN SMALL LETTER L] +"\u24DB" => "l" + +# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR] +"\u2C61" => "l" + +# ꝇ [LATIN SMALL LETTER BROKEN L] +"\uA747" => "l" + +# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE] +"\uA749" => "l" + +# ꞁ [LATIN SMALL LETTER TURNED L] +"\uA781" => "l" + +# l [FULLWIDTH LATIN SMALL LETTER L] +"\uFF4C" => "l" + +# LJ [LATIN CAPITAL LETTER LJ] +"\u01C7" => "LJ" + +# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL] +"\u1EFA" => "LL" + +# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J] +"\u01C8" => "Lj" + +# ⒧ [PARENTHESIZED LATIN SMALL LETTER L] +"\u24A7" => "(l)" + +# lj [LATIN SMALL LETTER LJ] +"\u01C9" => "lj" + +# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL] +"\u1EFB" => "ll" + +# ʪ [LATIN SMALL LETTER LS DIGRAPH] +"\u02AA" => "ls" + +# ʫ [LATIN SMALL LETTER LZ DIGRAPH] +"\u02AB" => "lz" + +# Ɯ [LATIN CAPITAL LETTER TURNED M] +"\u019C" => "M" + +# ᴍ [LATIN LETTER SMALL CAPITAL M] +"\u1D0D" => "M" + +# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE] +"\u1E3E" => "M" + +# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE] +"\u1E40" => "M" + +# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW] +"\u1E42" => "M" + +# Ⓜ [CIRCLED LATIN CAPITAL LETTER M] +"\u24C2" => "M" + +# Ɱ [LATIN CAPITAL LETTER M WITH HOOK] +"\u2C6E" => "M" + +# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M] +"\uA7FD" => "M" + +# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M] +"\uA7FF" => "M" + +# M [FULLWIDTH LATIN CAPITAL LETTER M] +"\uFF2D" => "M" + +# ɯ [LATIN SMALL LETTER TURNED M] +"\u026F" => "m" + +# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG] +"\u0270" => "m" + +# ɱ [LATIN SMALL LETTER M WITH HOOK] +"\u0271" => "m" + +# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE] +"\u1D6F" => "m" + +# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK] +"\u1D86" => "m" + +# ḿ [LATIN SMALL LETTER M WITH ACUTE] +"\u1E3F" => "m" + +# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE] +"\u1E41" => "m" + +# ṃ [LATIN SMALL LETTER M WITH DOT BELOW] +"\u1E43" => "m" + +# ⓜ [CIRCLED LATIN SMALL LETTER M] +"\u24DC" => "m" + +# m [FULLWIDTH LATIN SMALL LETTER M] +"\uFF4D" => "m" + +# ⒨ [PARENTHESIZED LATIN SMALL LETTER M] +"\u24A8" => "(m)" + +# Ñ [LATIN CAPITAL LETTER N WITH TILDE] +"\u00D1" => "N" + +# Ń [LATIN CAPITAL LETTER N WITH ACUTE] +"\u0143" => "N" + +# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA] +"\u0145" => "N" + +# Ň [LATIN CAPITAL LETTER N WITH CARON] +"\u0147" => "N" + +# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG] +"\u014A" => "N" + +# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK] +"\u019D" => "N" + +# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE] +"\u01F8" => "N" + +# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] +"\u0220" => "N" + +# ɴ [LATIN LETTER SMALL CAPITAL N] +"\u0274" => "N" + +# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N] +"\u1D0E" => "N" + +# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE] +"\u1E44" => "N" + +# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW] +"\u1E46" => "N" + +# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW] +"\u1E48" => "N" + +# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] +"\u1E4A" => "N" + +# Ⓝ [CIRCLED LATIN CAPITAL LETTER N] +"\u24C3" => "N" + +# N [FULLWIDTH LATIN CAPITAL LETTER N] +"\uFF2E" => "N" + +# ñ [LATIN SMALL LETTER N WITH TILDE] +"\u00F1" => "n" + +# ń [LATIN SMALL LETTER N WITH ACUTE] +"\u0144" => "n" + +# ņ [LATIN SMALL LETTER N WITH CEDILLA] +"\u0146" => "n" + +# ň [LATIN SMALL LETTER N WITH CARON] +"\u0148" => "n" + +# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] +"\u0149" => "n" + +# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG] +"\u014B" => "n" + +# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG] +"\u019E" => "n" + +# ǹ [LATIN SMALL LETTER N WITH GRAVE] +"\u01F9" => "n" + +# ȵ [LATIN SMALL LETTER N WITH CURL] +"\u0235" => "n" + +# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK] +"\u0272" => "n" + +# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK] +"\u0273" => "n" + +# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE] +"\u1D70" => "n" + +# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK] +"\u1D87" => "n" + +# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE] +"\u1E45" => "n" + +# ṇ [LATIN SMALL LETTER N WITH DOT BELOW] +"\u1E47" => "n" + +# ṉ [LATIN SMALL LETTER N WITH LINE BELOW] +"\u1E49" => "n" + +# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] +"\u1E4B" => "n" + +# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N] +"\u207F" => "n" + +# ⓝ [CIRCLED LATIN SMALL LETTER N] +"\u24DD" => "n" + +# n [FULLWIDTH LATIN SMALL LETTER N] +"\uFF4E" => "n" + +# NJ [LATIN CAPITAL LETTER NJ] +"\u01CA" => "NJ" + +# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J] +"\u01CB" => "Nj" + +# ⒩ [PARENTHESIZED LATIN SMALL LETTER N] +"\u24A9" => "(n)" + +# nj [LATIN SMALL LETTER NJ] +"\u01CC" => "nj" + +# Ò [LATIN CAPITAL LETTER O WITH GRAVE] +"\u00D2" => "O" + +# Ó [LATIN CAPITAL LETTER O WITH ACUTE] +"\u00D3" => "O" + +# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] +"\u00D4" => "O" + +# Õ [LATIN CAPITAL LETTER O WITH TILDE] +"\u00D5" => "O" + +# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS] +"\u00D6" => "O" + +# Ø [LATIN CAPITAL LETTER O WITH STROKE] +"\u00D8" => "O" + +# Ō [LATIN CAPITAL LETTER O WITH MACRON] +"\u014C" => "O" + +# Ŏ [LATIN CAPITAL LETTER O WITH BREVE] +"\u014E" => "O" + +# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] +"\u0150" => "O" + +# Ɔ [LATIN CAPITAL LETTER OPEN O] +"\u0186" => "O" + +# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] +"\u019F" => "O" + +# Ơ [LATIN CAPITAL LETTER O WITH HORN] +"\u01A0" => "O" + +# Ǒ [LATIN CAPITAL LETTER O WITH CARON] +"\u01D1" => "O" + +# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK] +"\u01EA" => "O" + +# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] +"\u01EC" => "O" + +# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] +"\u01FE" => "O" + +# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] +"\u020C" => "O" + +# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE] +"\u020E" => "O" + +# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] +"\u022A" => "O" + +# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] +"\u022C" => "O" + +# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE] +"\u022E" => "O" + +# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] +"\u0230" => "O" + +# ᴏ [LATIN LETTER SMALL CAPITAL O] +"\u1D0F" => "O" + +# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O] +"\u1D10" => "O" + +# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] +"\u1E4C" => "O" + +# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] +"\u1E4E" => "O" + +# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] +"\u1E50" => "O" + +# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] +"\u1E52" => "O" + +# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW] +"\u1ECC" => "O" + +# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE] +"\u1ECE" => "O" + +# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] +"\u1ED0" => "O" + +# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] +"\u1ED2" => "O" + +# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1ED4" => "O" + +# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] +"\u1ED6" => "O" + +# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] +"\u1ED8" => "O" + +# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] +"\u1EDA" => "O" + +# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] +"\u1EDC" => "O" + +# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] +"\u1EDE" => "O" + +# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE] +"\u1EE0" => "O" + +# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] +"\u1EE2" => "O" + +# Ⓞ [CIRCLED LATIN CAPITAL LETTER O] +"\u24C4" => "O" + +# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] +"\uA74A" => "O" + +# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP] +"\uA74C" => "O" + +# O [FULLWIDTH LATIN CAPITAL LETTER O] +"\uFF2F" => "O" + +# ò [LATIN SMALL LETTER O WITH GRAVE] +"\u00F2" => "o" + +# ó [LATIN SMALL LETTER O WITH ACUTE] +"\u00F3" => "o" + +# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX] +"\u00F4" => "o" + +# õ [LATIN SMALL LETTER O WITH TILDE] +"\u00F5" => "o" + +# ö [LATIN SMALL LETTER O WITH DIAERESIS] +"\u00F6" => "o" + +# ø [LATIN SMALL LETTER O WITH STROKE] +"\u00F8" => "o" + +# ō [LATIN SMALL LETTER O WITH MACRON] +"\u014D" => "o" + +# ŏ [LATIN SMALL LETTER O WITH BREVE] +"\u014F" => "o" + +# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE] +"\u0151" => "o" + +# ơ [LATIN SMALL LETTER O WITH HORN] +"\u01A1" => "o" + +# ǒ [LATIN SMALL LETTER O WITH CARON] +"\u01D2" => "o" + +# ǫ [LATIN SMALL LETTER O WITH OGONEK] +"\u01EB" => "o" + +# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON] +"\u01ED" => "o" + +# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE] +"\u01FF" => "o" + +# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE] +"\u020D" => "o" + +# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE] +"\u020F" => "o" + +# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] +"\u022B" => "o" + +# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON] +"\u022D" => "o" + +# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE] +"\u022F" => "o" + +# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] +"\u0231" => "o" + +# ɔ [LATIN SMALL LETTER OPEN O] +"\u0254" => "o" + +# ɵ [LATIN SMALL LETTER BARRED O] +"\u0275" => "o" + +# ᴖ [LATIN SMALL LETTER TOP HALF O] +"\u1D16" => "o" + +# ᴗ [LATIN SMALL LETTER BOTTOM HALF O] +"\u1D17" => "o" + +# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] +"\u1D97" => "o" + +# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE] +"\u1E4D" => "o" + +# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] +"\u1E4F" => "o" + +# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE] +"\u1E51" => "o" + +# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE] +"\u1E53" => "o" + +# ọ [LATIN SMALL LETTER O WITH DOT BELOW] +"\u1ECD" => "o" + +# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE] +"\u1ECF" => "o" + +# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] +"\u1ED1" => "o" + +# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] +"\u1ED3" => "o" + +# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] +"\u1ED5" => "o" + +# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] +"\u1ED7" => "o" + +# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] +"\u1ED9" => "o" + +# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE] +"\u1EDB" => "o" + +# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE] +"\u1EDD" => "o" + +# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] +"\u1EDF" => "o" + +# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE] +"\u1EE1" => "o" + +# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] +"\u1EE3" => "o" + +# ₒ [LATIN SUBSCRIPT SMALL LETTER O] +"\u2092" => "o" + +# ⓞ [CIRCLED LATIN SMALL LETTER O] +"\u24DE" => "o" + +# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE] +"\u2C7A" => "o" + +# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] +"\uA74B" => "o" + +# ꝍ [LATIN SMALL LETTER O WITH LOOP] +"\uA74D" => "o" + +# o [FULLWIDTH LATIN SMALL LETTER O] +"\uFF4F" => "o" + +# Œ [LATIN CAPITAL LIGATURE OE] +"\u0152" => "OE" + +# ɶ [LATIN LETTER SMALL CAPITAL OE] +"\u0276" => "OE" + +# Ꝏ [LATIN CAPITAL LETTER OO] +"\uA74E" => "OO" + +# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU] +"\u0222" => "OU" + +# ᴕ [LATIN LETTER SMALL CAPITAL OU] +"\u1D15" => "OU" + +# ⒪ [PARENTHESIZED LATIN SMALL LETTER O] +"\u24AA" => "(o)" + +# œ [LATIN SMALL LIGATURE OE] +"\u0153" => "oe" + +# ᴔ [LATIN SMALL LETTER TURNED OE] +"\u1D14" => "oe" + +# ꝏ [LATIN SMALL LETTER OO] +"\uA74F" => "oo" + +# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU] +"\u0223" => "ou" + +# Ƥ [LATIN CAPITAL LETTER P WITH HOOK] +"\u01A4" => "P" + +# ᴘ [LATIN LETTER SMALL CAPITAL P] +"\u1D18" => "P" + +# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE] +"\u1E54" => "P" + +# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE] +"\u1E56" => "P" + +# Ⓟ [CIRCLED LATIN CAPITAL LETTER P] +"\u24C5" => "P" + +# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE] +"\u2C63" => "P" + +# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] +"\uA750" => "P" + +# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH] +"\uA752" => "P" + +# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] +"\uA754" => "P" + +# P [FULLWIDTH LATIN CAPITAL LETTER P] +"\uFF30" => "P" + +# ƥ [LATIN SMALL LETTER P WITH HOOK] +"\u01A5" => "p" + +# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE] +"\u1D71" => "p" + +# ᵽ [LATIN SMALL LETTER P WITH STROKE] +"\u1D7D" => "p" + +# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK] +"\u1D88" => "p" + +# ṕ [LATIN SMALL LETTER P WITH ACUTE] +"\u1E55" => "p" + +# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE] +"\u1E57" => "p" + +# ⓟ [CIRCLED LATIN SMALL LETTER P] +"\u24DF" => "p" + +# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] +"\uA751" => "p" + +# ꝓ [LATIN SMALL LETTER P WITH FLOURISH] +"\uA753" => "p" + +# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL] +"\uA755" => "p" + +# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P] +"\uA7FC" => "p" + +# p [FULLWIDTH LATIN SMALL LETTER P] +"\uFF50" => "p" + +# ⒫ [PARENTHESIZED LATIN SMALL LETTER P] +"\u24AB" => "(p)" + +# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] +"\u024A" => "Q" + +# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q] +"\u24C6" => "Q" + +# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] +"\uA756" => "Q" + +# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] +"\uA758" => "Q" + +# Q [FULLWIDTH LATIN CAPITAL LETTER Q] +"\uFF31" => "Q" + +# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA] +"\u0138" => "q" + +# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL] +"\u024B" => "q" + +# ʠ [LATIN SMALL LETTER Q WITH HOOK] +"\u02A0" => "q" + +# ⓠ [CIRCLED LATIN SMALL LETTER Q] +"\u24E0" => "q" + +# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] +"\uA757" => "q" + +# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] +"\uA759" => "q" + +# q [FULLWIDTH LATIN SMALL LETTER Q] +"\uFF51" => "q" + +# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q] +"\u24AC" => "(q)" + +# ȹ [LATIN SMALL LETTER QP DIGRAPH] +"\u0239" => "qp" + +# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE] +"\u0154" => "R" + +# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA] +"\u0156" => "R" + +# Ř [LATIN CAPITAL LETTER R WITH CARON] +"\u0158" => "R" + +# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] +"\u0210" => "R" + +# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE] +"\u0212" => "R" + +# Ɍ [LATIN CAPITAL LETTER R WITH STROKE] +"\u024C" => "R" + +# ʀ [LATIN LETTER SMALL CAPITAL R] +"\u0280" => "R" + +# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R] +"\u0281" => "R" + +# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R] +"\u1D19" => "R" + +# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R] +"\u1D1A" => "R" + +# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE] +"\u1E58" => "R" + +# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW] +"\u1E5A" => "R" + +# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] +"\u1E5C" => "R" + +# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW] +"\u1E5E" => "R" + +# Ⓡ [CIRCLED LATIN CAPITAL LETTER R] +"\u24C7" => "R" + +# Ɽ [LATIN CAPITAL LETTER R WITH TAIL] +"\u2C64" => "R" + +# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA] +"\uA75A" => "R" + +# Ꞃ [LATIN CAPITAL LETTER INSULAR R] +"\uA782" => "R" + +# R [FULLWIDTH LATIN CAPITAL LETTER R] +"\uFF32" => "R" + +# ŕ [LATIN SMALL LETTER R WITH ACUTE] +"\u0155" => "r" + +# ŗ [LATIN SMALL LETTER R WITH CEDILLA] +"\u0157" => "r" + +# ř [LATIN SMALL LETTER R WITH CARON] +"\u0159" => "r" + +# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE] +"\u0211" => "r" + +# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE] +"\u0213" => "r" + +# ɍ [LATIN SMALL LETTER R WITH STROKE] +"\u024D" => "r" + +# ɼ [LATIN SMALL LETTER R WITH LONG LEG] +"\u027C" => "r" + +# ɽ [LATIN SMALL LETTER R WITH TAIL] +"\u027D" => "r" + +# ɾ [LATIN SMALL LETTER R WITH FISHHOOK] +"\u027E" => "r" + +# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] +"\u027F" => "r" + +# ᵣ [LATIN SUBSCRIPT SMALL LETTER R] +"\u1D63" => "r" + +# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE] +"\u1D72" => "r" + +# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] +"\u1D73" => "r" + +# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK] +"\u1D89" => "r" + +# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE] +"\u1E59" => "r" + +# ṛ [LATIN SMALL LETTER R WITH DOT BELOW] +"\u1E5B" => "r" + +# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] +"\u1E5D" => "r" + +# ṟ [LATIN SMALL LETTER R WITH LINE BELOW] +"\u1E5F" => "r" + +# ⓡ [CIRCLED LATIN SMALL LETTER R] +"\u24E1" => "r" + +# ꝛ [LATIN SMALL LETTER R ROTUNDA] +"\uA75B" => "r" + +# ꞃ [LATIN SMALL LETTER INSULAR R] +"\uA783" => "r" + +# r [FULLWIDTH LATIN SMALL LETTER R] +"\uFF52" => "r" + +# ⒭ [PARENTHESIZED LATIN SMALL LETTER R] +"\u24AD" => "(r)" + +# Ś [LATIN CAPITAL LETTER S WITH ACUTE] +"\u015A" => "S" + +# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] +"\u015C" => "S" + +# Ş [LATIN CAPITAL LETTER S WITH CEDILLA] +"\u015E" => "S" + +# Š [LATIN CAPITAL LETTER S WITH CARON] +"\u0160" => "S" + +# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW] +"\u0218" => "S" + +# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE] +"\u1E60" => "S" + +# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW] +"\u1E62" => "S" + +# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] +"\u1E64" => "S" + +# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] +"\u1E66" => "S" + +# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] +"\u1E68" => "S" + +# Ⓢ [CIRCLED LATIN CAPITAL LETTER S] +"\u24C8" => "S" + +# ꜱ [LATIN LETTER SMALL CAPITAL S] +"\uA731" => "S" + +# ꞅ [LATIN SMALL LETTER INSULAR S] +"\uA785" => "S" + +# S [FULLWIDTH LATIN CAPITAL LETTER S] +"\uFF33" => "S" + +# ś [LATIN SMALL LETTER S WITH ACUTE] +"\u015B" => "s" + +# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX] +"\u015D" => "s" + +# ş [LATIN SMALL LETTER S WITH CEDILLA] +"\u015F" => "s" + +# š [LATIN SMALL LETTER S WITH CARON] +"\u0161" => "s" + +# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S] +"\u017F" => "s" + +# ș [LATIN SMALL LETTER S WITH COMMA BELOW] +"\u0219" => "s" + +# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL] +"\u023F" => "s" + +# ʂ [LATIN SMALL LETTER S WITH HOOK] +"\u0282" => "s" + +# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE] +"\u1D74" => "s" + +# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK] +"\u1D8A" => "s" + +# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE] +"\u1E61" => "s" + +# ṣ [LATIN SMALL LETTER S WITH DOT BELOW] +"\u1E63" => "s" + +# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] +"\u1E65" => "s" + +# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] +"\u1E67" => "s" + +# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] +"\u1E69" => "s" + +# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] +"\u1E9C" => "s" + +# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE] +"\u1E9D" => "s" + +# ⓢ [CIRCLED LATIN SMALL LETTER S] +"\u24E2" => "s" + +# Ꞅ [LATIN CAPITAL LETTER INSULAR S] +"\uA784" => "s" + +# s [FULLWIDTH LATIN SMALL LETTER S] +"\uFF53" => "s" + +# ẞ [LATIN CAPITAL LETTER SHARP S] +"\u1E9E" => "SS" + +# ⒮ [PARENTHESIZED LATIN SMALL LETTER S] +"\u24AE" => "(s)" + +# ß [LATIN SMALL LETTER SHARP S] +"\u00DF" => "ss" + +# st [LATIN SMALL LIGATURE ST] +"\uFB06" => "st" + +# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA] +"\u0162" => "T" + +# Ť [LATIN CAPITAL LETTER T WITH CARON] +"\u0164" => "T" + +# Ŧ [LATIN CAPITAL LETTER T WITH STROKE] +"\u0166" => "T" + +# Ƭ [LATIN CAPITAL LETTER T WITH HOOK] +"\u01AC" => "T" + +# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] +"\u01AE" => "T" + +# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW] +"\u021A" => "T" + +# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] +"\u023E" => "T" + +# ᴛ [LATIN LETTER SMALL CAPITAL T] +"\u1D1B" => "T" + +# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE] +"\u1E6A" => "T" + +# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW] +"\u1E6C" => "T" + +# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW] +"\u1E6E" => "T" + +# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] +"\u1E70" => "T" + +# Ⓣ [CIRCLED LATIN CAPITAL LETTER T] +"\u24C9" => "T" + +# Ꞇ [LATIN CAPITAL LETTER INSULAR T] +"\uA786" => "T" + +# T [FULLWIDTH LATIN CAPITAL LETTER T] +"\uFF34" => "T" + +# ţ [LATIN SMALL LETTER T WITH CEDILLA] +"\u0163" => "t" + +# ť [LATIN SMALL LETTER T WITH CARON] +"\u0165" => "t" + +# ŧ [LATIN SMALL LETTER T WITH STROKE] +"\u0167" => "t" + +# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK] +"\u01AB" => "t" + +# ƭ [LATIN SMALL LETTER T WITH HOOK] +"\u01AD" => "t" + +# ț [LATIN SMALL LETTER T WITH COMMA BELOW] +"\u021B" => "t" + +# ȶ [LATIN SMALL LETTER T WITH CURL] +"\u0236" => "t" + +# ʇ [LATIN SMALL LETTER TURNED T] +"\u0287" => "t" + +# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK] +"\u0288" => "t" + +# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE] +"\u1D75" => "t" + +# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE] +"\u1E6B" => "t" + +# ṭ [LATIN SMALL LETTER T WITH DOT BELOW] +"\u1E6D" => "t" + +# ṯ [LATIN SMALL LETTER T WITH LINE BELOW] +"\u1E6F" => "t" + +# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] +"\u1E71" => "t" + +# ẗ [LATIN SMALL LETTER T WITH DIAERESIS] +"\u1E97" => "t" + +# ⓣ [CIRCLED LATIN SMALL LETTER T] +"\u24E3" => "t" + +# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE] +"\u2C66" => "t" + +# t [FULLWIDTH LATIN SMALL LETTER T] +"\uFF54" => "t" + +# Þ [LATIN CAPITAL LETTER THORN] +"\u00DE" => "TH" + +# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] +"\uA766" => "TH" + +# Ꜩ [LATIN CAPITAL LETTER TZ] +"\uA728" => "TZ" + +# ⒯ [PARENTHESIZED LATIN SMALL LETTER T] +"\u24AF" => "(t)" + +# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL] +"\u02A8" => "tc" + +# þ [LATIN SMALL LETTER THORN] +"\u00FE" => "th" + +# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH] +"\u1D7A" => "th" + +# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] +"\uA767" => "th" + +# ʦ [LATIN SMALL LETTER TS DIGRAPH] +"\u02A6" => "ts" + +# ꜩ [LATIN SMALL LETTER TZ] +"\uA729" => "tz" + +# Ù [LATIN CAPITAL LETTER U WITH GRAVE] +"\u00D9" => "U" + +# Ú [LATIN CAPITAL LETTER U WITH ACUTE] +"\u00DA" => "U" + +# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] +"\u00DB" => "U" + +# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS] +"\u00DC" => "U" + +# Ũ [LATIN CAPITAL LETTER U WITH TILDE] +"\u0168" => "U" + +# Ū [LATIN CAPITAL LETTER U WITH MACRON] +"\u016A" => "U" + +# Ŭ [LATIN CAPITAL LETTER U WITH BREVE] +"\u016C" => "U" + +# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE] +"\u016E" => "U" + +# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] +"\u0170" => "U" + +# Ų [LATIN CAPITAL LETTER U WITH OGONEK] +"\u0172" => "U" + +# Ư [LATIN CAPITAL LETTER U WITH HORN] +"\u01AF" => "U" + +# Ǔ [LATIN CAPITAL LETTER U WITH CARON] +"\u01D3" => "U" + +# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] +"\u01D5" => "U" + +# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] +"\u01D7" => "U" + +# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] +"\u01D9" => "U" + +# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] +"\u01DB" => "U" + +# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] +"\u0214" => "U" + +# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE] +"\u0216" => "U" + +# Ʉ [LATIN CAPITAL LETTER U BAR] +"\u0244" => "U" + +# ᴜ [LATIN LETTER SMALL CAPITAL U] +"\u1D1C" => "U" + +# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE] +"\u1D7E" => "U" + +# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] +"\u1E72" => "U" + +# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW] +"\u1E74" => "U" + +# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] +"\u1E76" => "U" + +# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] +"\u1E78" => "U" + +# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] +"\u1E7A" => "U" + +# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW] +"\u1EE4" => "U" + +# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE] +"\u1EE6" => "U" + +# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] +"\u1EE8" => "U" + +# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] +"\u1EEA" => "U" + +# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] +"\u1EEC" => "U" + +# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE] +"\u1EEE" => "U" + +# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] +"\u1EF0" => "U" + +# Ⓤ [CIRCLED LATIN CAPITAL LETTER U] +"\u24CA" => "U" + +# U [FULLWIDTH LATIN CAPITAL LETTER U] +"\uFF35" => "U" + +# ù [LATIN SMALL LETTER U WITH GRAVE] +"\u00F9" => "u" + +# ú [LATIN SMALL LETTER U WITH ACUTE] +"\u00FA" => "u" + +# û [LATIN SMALL LETTER U WITH CIRCUMFLEX] +"\u00FB" => "u" + +# ü [LATIN SMALL LETTER U WITH DIAERESIS] +"\u00FC" => "u" + +# ũ [LATIN SMALL LETTER U WITH TILDE] +"\u0169" => "u" + +# ū [LATIN SMALL LETTER U WITH MACRON] +"\u016B" => "u" + +# ŭ [LATIN SMALL LETTER U WITH BREVE] +"\u016D" => "u" + +# ů [LATIN SMALL LETTER U WITH RING ABOVE] +"\u016F" => "u" + +# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE] +"\u0171" => "u" + +# ų [LATIN SMALL LETTER U WITH OGONEK] +"\u0173" => "u" + +# ư [LATIN SMALL LETTER U WITH HORN] +"\u01B0" => "u" + +# ǔ [LATIN SMALL LETTER U WITH CARON] +"\u01D4" => "u" + +# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] +"\u01D6" => "u" + +# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] +"\u01D8" => "u" + +# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] +"\u01DA" => "u" + +# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] +"\u01DC" => "u" + +# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE] +"\u0215" => "u" + +# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE] +"\u0217" => "u" + +# ʉ [LATIN SMALL LETTER U BAR] +"\u0289" => "u" + +# ᵤ [LATIN SUBSCRIPT SMALL LETTER U] +"\u1D64" => "u" + +# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK] +"\u1D99" => "u" + +# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW] +"\u1E73" => "u" + +# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW] +"\u1E75" => "u" + +# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] +"\u1E77" => "u" + +# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE] +"\u1E79" => "u" + +# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] +"\u1E7B" => "u" + +# ụ [LATIN SMALL LETTER U WITH DOT BELOW] +"\u1EE5" => "u" + +# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE] +"\u1EE7" => "u" + +# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE] +"\u1EE9" => "u" + +# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE] +"\u1EEB" => "u" + +# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] +"\u1EED" => "u" + +# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE] +"\u1EEF" => "u" + +# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] +"\u1EF1" => "u" + +# ⓤ [CIRCLED LATIN SMALL LETTER U] +"\u24E4" => "u" + +# u [FULLWIDTH LATIN SMALL LETTER U] +"\uFF55" => "u" + +# ⒰ [PARENTHESIZED LATIN SMALL LETTER U] +"\u24B0" => "(u)" + +# ᵫ [LATIN SMALL LETTER UE] +"\u1D6B" => "ue" + +# Ʋ [LATIN CAPITAL LETTER V WITH HOOK] +"\u01B2" => "V" + +# Ʌ [LATIN CAPITAL LETTER TURNED V] +"\u0245" => "V" + +# ᴠ [LATIN LETTER SMALL CAPITAL V] +"\u1D20" => "V" + +# Ṽ [LATIN CAPITAL LETTER V WITH TILDE] +"\u1E7C" => "V" + +# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW] +"\u1E7E" => "V" + +# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V] +"\u1EFC" => "V" + +# Ⓥ [CIRCLED LATIN CAPITAL LETTER V] +"\u24CB" => "V" + +# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] +"\uA75E" => "V" + +# Ꝩ [LATIN CAPITAL LETTER VEND] +"\uA768" => "V" + +# V [FULLWIDTH LATIN CAPITAL LETTER V] +"\uFF36" => "V" + +# ʋ [LATIN SMALL LETTER V WITH HOOK] +"\u028B" => "v" + +# ʌ [LATIN SMALL LETTER TURNED V] +"\u028C" => "v" + +# ᵥ [LATIN SUBSCRIPT SMALL LETTER V] +"\u1D65" => "v" + +# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK] +"\u1D8C" => "v" + +# ṽ [LATIN SMALL LETTER V WITH TILDE] +"\u1E7D" => "v" + +# ṿ [LATIN SMALL LETTER V WITH DOT BELOW] +"\u1E7F" => "v" + +# ⓥ [CIRCLED LATIN SMALL LETTER V] +"\u24E5" => "v" + +# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK] +"\u2C71" => "v" + +# ⱴ [LATIN SMALL LETTER V WITH CURL] +"\u2C74" => "v" + +# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE] +"\uA75F" => "v" + +# v [FULLWIDTH LATIN SMALL LETTER V] +"\uFF56" => "v" + +# Ꝡ [LATIN CAPITAL LETTER VY] +"\uA760" => "VY" + +# ⒱ [PARENTHESIZED LATIN SMALL LETTER V] +"\u24B1" => "(v)" + +# ꝡ [LATIN SMALL LETTER VY] +"\uA761" => "vy" + +# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] +"\u0174" => "W" + +# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN] +"\u01F7" => "W" + +# ᴡ [LATIN LETTER SMALL CAPITAL W] +"\u1D21" => "W" + +# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE] +"\u1E80" => "W" + +# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE] +"\u1E82" => "W" + +# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS] +"\u1E84" => "W" + +# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE] +"\u1E86" => "W" + +# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW] +"\u1E88" => "W" + +# Ⓦ [CIRCLED LATIN CAPITAL LETTER W] +"\u24CC" => "W" + +# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK] +"\u2C72" => "W" + +# W [FULLWIDTH LATIN CAPITAL LETTER W] +"\uFF37" => "W" + +# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX] +"\u0175" => "w" + +# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN] +"\u01BF" => "w" + +# ʍ [LATIN SMALL LETTER TURNED W] +"\u028D" => "w" + +# ẁ [LATIN SMALL LETTER W WITH GRAVE] +"\u1E81" => "w" + +# ẃ [LATIN SMALL LETTER W WITH ACUTE] +"\u1E83" => "w" + +# ẅ [LATIN SMALL LETTER W WITH DIAERESIS] +"\u1E85" => "w" + +# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE] +"\u1E87" => "w" + +# ẉ [LATIN SMALL LETTER W WITH DOT BELOW] +"\u1E89" => "w" + +# ẘ [LATIN SMALL LETTER W WITH RING ABOVE] +"\u1E98" => "w" + +# ⓦ [CIRCLED LATIN SMALL LETTER W] +"\u24E6" => "w" + +# ⱳ [LATIN SMALL LETTER W WITH HOOK] +"\u2C73" => "w" + +# w [FULLWIDTH LATIN SMALL LETTER W] +"\uFF57" => "w" + +# ⒲ [PARENTHESIZED LATIN SMALL LETTER W] +"\u24B2" => "(w)" + +# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE] +"\u1E8A" => "X" + +# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS] +"\u1E8C" => "X" + +# Ⓧ [CIRCLED LATIN CAPITAL LETTER X] +"\u24CD" => "X" + +# X [FULLWIDTH LATIN CAPITAL LETTER X] +"\uFF38" => "X" + +# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK] +"\u1D8D" => "x" + +# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE] +"\u1E8B" => "x" + +# ẍ [LATIN SMALL LETTER X WITH DIAERESIS] +"\u1E8D" => "x" + +# ₓ [LATIN SUBSCRIPT SMALL LETTER X] +"\u2093" => "x" + +# ⓧ [CIRCLED LATIN SMALL LETTER X] +"\u24E7" => "x" + +# x [FULLWIDTH LATIN SMALL LETTER X] +"\uFF58" => "x" + +# ⒳ [PARENTHESIZED LATIN SMALL LETTER X] +"\u24B3" => "(x)" + +# Ý [LATIN CAPITAL LETTER Y WITH ACUTE] +"\u00DD" => "Y" + +# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] +"\u0176" => "Y" + +# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS] +"\u0178" => "Y" + +# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK] +"\u01B3" => "Y" + +# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON] +"\u0232" => "Y" + +# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE] +"\u024E" => "Y" + +# ʏ [LATIN LETTER SMALL CAPITAL Y] +"\u028F" => "Y" + +# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE] +"\u1E8E" => "Y" + +# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE] +"\u1EF2" => "Y" + +# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW] +"\u1EF4" => "Y" + +# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] +"\u1EF6" => "Y" + +# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE] +"\u1EF8" => "Y" + +# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP] +"\u1EFE" => "Y" + +# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y] +"\u24CE" => "Y" + +# Y [FULLWIDTH LATIN CAPITAL LETTER Y] +"\uFF39" => "Y" + +# ý [LATIN SMALL LETTER Y WITH ACUTE] +"\u00FD" => "y" + +# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS] +"\u00FF" => "y" + +# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX] +"\u0177" => "y" + +# ƴ [LATIN SMALL LETTER Y WITH HOOK] +"\u01B4" => "y" + +# ȳ [LATIN SMALL LETTER Y WITH MACRON] +"\u0233" => "y" + +# ɏ [LATIN SMALL LETTER Y WITH STROKE] +"\u024F" => "y" + +# ʎ [LATIN SMALL LETTER TURNED Y] +"\u028E" => "y" + +# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE] +"\u1E8F" => "y" + +# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE] +"\u1E99" => "y" + +# ỳ [LATIN SMALL LETTER Y WITH GRAVE] +"\u1EF3" => "y" + +# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW] +"\u1EF5" => "y" + +# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE] +"\u1EF7" => "y" + +# ỹ [LATIN SMALL LETTER Y WITH TILDE] +"\u1EF9" => "y" + +# ỿ [LATIN SMALL LETTER Y WITH LOOP] +"\u1EFF" => "y" + +# ⓨ [CIRCLED LATIN SMALL LETTER Y] +"\u24E8" => "y" + +# y [FULLWIDTH LATIN SMALL LETTER Y] +"\uFF59" => "y" + +# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y] +"\u24B4" => "(y)" + +# Ź [LATIN CAPITAL LETTER Z WITH ACUTE] +"\u0179" => "Z" + +# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE] +"\u017B" => "Z" + +# Ž [LATIN CAPITAL LETTER Z WITH CARON] +"\u017D" => "Z" + +# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE] +"\u01B5" => "Z" + +# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH] +"\u021C" => "Z" + +# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK] +"\u0224" => "Z" + +# ᴢ [LATIN LETTER SMALL CAPITAL Z] +"\u1D22" => "Z" + +# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] +"\u1E90" => "Z" + +# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW] +"\u1E92" => "Z" + +# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW] +"\u1E94" => "Z" + +# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z] +"\u24CF" => "Z" + +# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER] +"\u2C6B" => "Z" + +# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z] +"\uA762" => "Z" + +# Z [FULLWIDTH LATIN CAPITAL LETTER Z] +"\uFF3A" => "Z" + +# ź [LATIN SMALL LETTER Z WITH ACUTE] +"\u017A" => "z" + +# ż [LATIN SMALL LETTER Z WITH DOT ABOVE] +"\u017C" => "z" + +# ž [LATIN SMALL LETTER Z WITH CARON] +"\u017E" => "z" + +# ƶ [LATIN SMALL LETTER Z WITH STROKE] +"\u01B6" => "z" + +# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH] +"\u021D" => "z" + +# ȥ [LATIN SMALL LETTER Z WITH HOOK] +"\u0225" => "z" + +# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL] +"\u0240" => "z" + +# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] +"\u0290" => "z" + +# ʑ [LATIN SMALL LETTER Z WITH CURL] +"\u0291" => "z" + +# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE] +"\u1D76" => "z" + +# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK] +"\u1D8E" => "z" + +# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX] +"\u1E91" => "z" + +# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW] +"\u1E93" => "z" + +# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW] +"\u1E95" => "z" + +# ⓩ [CIRCLED LATIN SMALL LETTER Z] +"\u24E9" => "z" + +# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER] +"\u2C6C" => "z" + +# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z] +"\uA763" => "z" + +# z [FULLWIDTH LATIN SMALL LETTER Z] +"\uFF5A" => "z" + +# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z] +"\u24B5" => "(z)" + +# ⁰ [SUPERSCRIPT ZERO] +"\u2070" => "0" + +# ₀ [SUBSCRIPT ZERO] +"\u2080" => "0" + +# ⓪ [CIRCLED DIGIT ZERO] +"\u24EA" => "0" + +# ⓿ [NEGATIVE CIRCLED DIGIT ZERO] +"\u24FF" => "0" + +# 0 [FULLWIDTH DIGIT ZERO] +"\uFF10" => "0" + +# ¹ [SUPERSCRIPT ONE] +"\u00B9" => "1" + +# ₁ [SUBSCRIPT ONE] +"\u2081" => "1" + +# ① [CIRCLED DIGIT ONE] +"\u2460" => "1" + +# ⓵ [DOUBLE CIRCLED DIGIT ONE] +"\u24F5" => "1" + +# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE] +"\u2776" => "1" + +# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] +"\u2780" => "1" + +# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] +"\u278A" => "1" + +# 1 [FULLWIDTH DIGIT ONE] +"\uFF11" => "1" + +# ⒈ [DIGIT ONE FULL STOP] +"\u2488" => "1." + +# ⑴ [PARENTHESIZED DIGIT ONE] +"\u2474" => "(1)" + +# ² [SUPERSCRIPT TWO] +"\u00B2" => "2" + +# ₂ [SUBSCRIPT TWO] +"\u2082" => "2" + +# ② [CIRCLED DIGIT TWO] +"\u2461" => "2" + +# ⓶ [DOUBLE CIRCLED DIGIT TWO] +"\u24F6" => "2" + +# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO] +"\u2777" => "2" + +# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] +"\u2781" => "2" + +# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] +"\u278B" => "2" + +# 2 [FULLWIDTH DIGIT TWO] +"\uFF12" => "2" + +# ⒉ [DIGIT TWO FULL STOP] +"\u2489" => "2." + +# ⑵ [PARENTHESIZED DIGIT TWO] +"\u2475" => "(2)" + +# ³ [SUPERSCRIPT THREE] +"\u00B3" => "3" + +# ₃ [SUBSCRIPT THREE] +"\u2083" => "3" + +# ③ [CIRCLED DIGIT THREE] +"\u2462" => "3" + +# ⓷ [DOUBLE CIRCLED DIGIT THREE] +"\u24F7" => "3" + +# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE] +"\u2778" => "3" + +# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] +"\u2782" => "3" + +# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] +"\u278C" => "3" + +# 3 [FULLWIDTH DIGIT THREE] +"\uFF13" => "3" + +# ⒊ [DIGIT THREE FULL STOP] +"\u248A" => "3." + +# ⑶ [PARENTHESIZED DIGIT THREE] +"\u2476" => "(3)" + +# ⁴ [SUPERSCRIPT FOUR] +"\u2074" => "4" + +# ₄ [SUBSCRIPT FOUR] +"\u2084" => "4" + +# ④ [CIRCLED DIGIT FOUR] +"\u2463" => "4" + +# ⓸ [DOUBLE CIRCLED DIGIT FOUR] +"\u24F8" => "4" + +# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] +"\u2779" => "4" + +# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] +"\u2783" => "4" + +# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] +"\u278D" => "4" + +# 4 [FULLWIDTH DIGIT FOUR] +"\uFF14" => "4" + +# ⒋ [DIGIT FOUR FULL STOP] +"\u248B" => "4." + +# ⑷ [PARENTHESIZED DIGIT FOUR] +"\u2477" => "(4)" + +# ⁵ [SUPERSCRIPT FIVE] +"\u2075" => "5" + +# ₅ [SUBSCRIPT FIVE] +"\u2085" => "5" + +# ⑤ [CIRCLED DIGIT FIVE] +"\u2464" => "5" + +# ⓹ [DOUBLE CIRCLED DIGIT FIVE] +"\u24F9" => "5" + +# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] +"\u277A" => "5" + +# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] +"\u2784" => "5" + +# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] +"\u278E" => "5" + +# 5 [FULLWIDTH DIGIT FIVE] +"\uFF15" => "5" + +# ⒌ [DIGIT FIVE FULL STOP] +"\u248C" => "5." + +# ⑸ [PARENTHESIZED DIGIT FIVE] +"\u2478" => "(5)" + +# ⁶ [SUPERSCRIPT SIX] +"\u2076" => "6" + +# ₆ [SUBSCRIPT SIX] +"\u2086" => "6" + +# ⑥ [CIRCLED DIGIT SIX] +"\u2465" => "6" + +# ⓺ [DOUBLE CIRCLED DIGIT SIX] +"\u24FA" => "6" + +# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX] +"\u277B" => "6" + +# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] +"\u2785" => "6" + +# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] +"\u278F" => "6" + +# 6 [FULLWIDTH DIGIT SIX] +"\uFF16" => "6" + +# ⒍ [DIGIT SIX FULL STOP] +"\u248D" => "6." + +# ⑹ [PARENTHESIZED DIGIT SIX] +"\u2479" => "(6)" + +# ⁷ [SUPERSCRIPT SEVEN] +"\u2077" => "7" + +# ₇ [SUBSCRIPT SEVEN] +"\u2087" => "7" + +# ⑦ [CIRCLED DIGIT SEVEN] +"\u2466" => "7" + +# ⓻ [DOUBLE CIRCLED DIGIT SEVEN] +"\u24FB" => "7" + +# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] +"\u277C" => "7" + +# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] +"\u2786" => "7" + +# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] +"\u2790" => "7" + +# 7 [FULLWIDTH DIGIT SEVEN] +"\uFF17" => "7" + +# ⒎ [DIGIT SEVEN FULL STOP] +"\u248E" => "7." + +# ⑺ [PARENTHESIZED DIGIT SEVEN] +"\u247A" => "(7)" + +# ⁸ [SUPERSCRIPT EIGHT] +"\u2078" => "8" + +# ₈ [SUBSCRIPT EIGHT] +"\u2088" => "8" + +# ⑧ [CIRCLED DIGIT EIGHT] +"\u2467" => "8" + +# ⓼ [DOUBLE CIRCLED DIGIT EIGHT] +"\u24FC" => "8" + +# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] +"\u277D" => "8" + +# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] +"\u2787" => "8" + +# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] +"\u2791" => "8" + +# 8 [FULLWIDTH DIGIT EIGHT] +"\uFF18" => "8" + +# ⒏ [DIGIT EIGHT FULL STOP] +"\u248F" => "8." + +# ⑻ [PARENTHESIZED DIGIT EIGHT] +"\u247B" => "(8)" + +# ⁹ [SUPERSCRIPT NINE] +"\u2079" => "9" + +# ₉ [SUBSCRIPT NINE] +"\u2089" => "9" + +# ⑨ [CIRCLED DIGIT NINE] +"\u2468" => "9" + +# ⓽ [DOUBLE CIRCLED DIGIT NINE] +"\u24FD" => "9" + +# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE] +"\u277E" => "9" + +# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] +"\u2788" => "9" + +# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] +"\u2792" => "9" + +# 9 [FULLWIDTH DIGIT NINE] +"\uFF19" => "9" + +# ⒐ [DIGIT NINE FULL STOP] +"\u2490" => "9." + +# ⑼ [PARENTHESIZED DIGIT NINE] +"\u247C" => "(9)" + +# ⑩ [CIRCLED NUMBER TEN] +"\u2469" => "10" + +# ⓾ [DOUBLE CIRCLED NUMBER TEN] +"\u24FE" => "10" + +# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN] +"\u277F" => "10" + +# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] +"\u2789" => "10" + +# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] +"\u2793" => "10" + +# ⒑ [NUMBER TEN FULL STOP] +"\u2491" => "10." + +# ⑽ [PARENTHESIZED NUMBER TEN] +"\u247D" => "(10)" + +# ⑪ [CIRCLED NUMBER ELEVEN] +"\u246A" => "11" + +# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN] +"\u24EB" => "11" + +# ⒒ [NUMBER ELEVEN FULL STOP] +"\u2492" => "11." + +# ⑾ [PARENTHESIZED NUMBER ELEVEN] +"\u247E" => "(11)" + +# ⑫ [CIRCLED NUMBER TWELVE] +"\u246B" => "12" + +# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE] +"\u24EC" => "12" + +# ⒓ [NUMBER TWELVE FULL STOP] +"\u2493" => "12." + +# ⑿ [PARENTHESIZED NUMBER TWELVE] +"\u247F" => "(12)" + +# ⑬ [CIRCLED NUMBER THIRTEEN] +"\u246C" => "13" + +# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN] +"\u24ED" => "13" + +# ⒔ [NUMBER THIRTEEN FULL STOP] +"\u2494" => "13." + +# ⒀ [PARENTHESIZED NUMBER THIRTEEN] +"\u2480" => "(13)" + +# ⑭ [CIRCLED NUMBER FOURTEEN] +"\u246D" => "14" + +# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN] +"\u24EE" => "14" + +# ⒕ [NUMBER FOURTEEN FULL STOP] +"\u2495" => "14." + +# ⒁ [PARENTHESIZED NUMBER FOURTEEN] +"\u2481" => "(14)" + +# ⑮ [CIRCLED NUMBER FIFTEEN] +"\u246E" => "15" + +# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN] +"\u24EF" => "15" + +# ⒖ [NUMBER FIFTEEN FULL STOP] +"\u2496" => "15." + +# ⒂ [PARENTHESIZED NUMBER FIFTEEN] +"\u2482" => "(15)" + +# ⑯ [CIRCLED NUMBER SIXTEEN] +"\u246F" => "16" + +# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN] +"\u24F0" => "16" + +# ⒗ [NUMBER SIXTEEN FULL STOP] +"\u2497" => "16." + +# ⒃ [PARENTHESIZED NUMBER SIXTEEN] +"\u2483" => "(16)" + +# ⑰ [CIRCLED NUMBER SEVENTEEN] +"\u2470" => "17" + +# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN] +"\u24F1" => "17" + +# ⒘ [NUMBER SEVENTEEN FULL STOP] +"\u2498" => "17." + +# ⒄ [PARENTHESIZED NUMBER SEVENTEEN] +"\u2484" => "(17)" + +# ⑱ [CIRCLED NUMBER EIGHTEEN] +"\u2471" => "18" + +# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN] +"\u24F2" => "18" + +# ⒙ [NUMBER EIGHTEEN FULL STOP] +"\u2499" => "18." + +# ⒅ [PARENTHESIZED NUMBER EIGHTEEN] +"\u2485" => "(18)" + +# ⑲ [CIRCLED NUMBER NINETEEN] +"\u2472" => "19" + +# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN] +"\u24F3" => "19" + +# ⒚ [NUMBER NINETEEN FULL STOP] +"\u249A" => "19." + +# ⒆ [PARENTHESIZED NUMBER NINETEEN] +"\u2486" => "(19)" + +# ⑳ [CIRCLED NUMBER TWENTY] +"\u2473" => "20" + +# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY] +"\u24F4" => "20" + +# ⒛ [NUMBER TWENTY FULL STOP] +"\u249B" => "20." + +# ⒇ [PARENTHESIZED NUMBER TWENTY] +"\u2487" => "(20)" + +# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] +"\u00AB" => "\"" + +# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] +"\u00BB" => "\"" + +# “ [LEFT DOUBLE QUOTATION MARK] +"\u201C" => "\"" + +# ” [RIGHT DOUBLE QUOTATION MARK] +"\u201D" => "\"" + +# „ [DOUBLE LOW-9 QUOTATION MARK] +"\u201E" => "\"" + +# ″ [DOUBLE PRIME] +"\u2033" => "\"" + +# ‶ [REVERSED DOUBLE PRIME] +"\u2036" => "\"" + +# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] +"\u275D" => "\"" + +# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] +"\u275E" => "\"" + +# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] +"\u276E" => "\"" + +# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] +"\u276F" => "\"" + +# " [FULLWIDTH QUOTATION MARK] +"\uFF02" => "\"" + +# ‘ [LEFT SINGLE QUOTATION MARK] +"\u2018" => "\'" + +# ’ [RIGHT SINGLE QUOTATION MARK] +"\u2019" => "\'" + +# ‚ [SINGLE LOW-9 QUOTATION MARK] +"\u201A" => "\'" + +# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK] +"\u201B" => "\'" + +# ′ [PRIME] +"\u2032" => "\'" + +# ‵ [REVERSED PRIME] +"\u2035" => "\'" + +# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] +"\u2039" => "\'" + +# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] +"\u203A" => "\'" + +# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] +"\u275B" => "\'" + +# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] +"\u275C" => "\'" + +# ' [FULLWIDTH APOSTROPHE] +"\uFF07" => "\'" + +# ‐ [HYPHEN] +"\u2010" => "-" + +# ‑ [NON-BREAKING HYPHEN] +"\u2011" => "-" + +# ‒ [FIGURE DASH] +"\u2012" => "-" + +# – [EN DASH] +"\u2013" => "-" + +# — [EM DASH] +"\u2014" => "-" + +# ⁻ [SUPERSCRIPT MINUS] +"\u207B" => "-" + +# ₋ [SUBSCRIPT MINUS] +"\u208B" => "-" + +# - [FULLWIDTH HYPHEN-MINUS] +"\uFF0D" => "-" + +# ⁅ [LEFT SQUARE BRACKET WITH QUILL] +"\u2045" => "[" + +# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] +"\u2772" => "[" + +# [ [FULLWIDTH LEFT SQUARE BRACKET] +"\uFF3B" => "[" + +# ⁆ [RIGHT SQUARE BRACKET WITH QUILL] +"\u2046" => "]" + +# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] +"\u2773" => "]" + +# ] [FULLWIDTH RIGHT SQUARE BRACKET] +"\uFF3D" => "]" + +# ⁽ [SUPERSCRIPT LEFT PARENTHESIS] +"\u207D" => "(" + +# ₍ [SUBSCRIPT LEFT PARENTHESIS] +"\u208D" => "(" + +# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT] +"\u2768" => "(" + +# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] +"\u276A" => "(" + +# ( [FULLWIDTH LEFT PARENTHESIS] +"\uFF08" => "(" + +# ⸨ [LEFT DOUBLE PARENTHESIS] +"\u2E28" => "((" + +# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS] +"\u207E" => ")" + +# ₎ [SUBSCRIPT RIGHT PARENTHESIS] +"\u208E" => ")" + +# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT] +"\u2769" => ")" + +# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] +"\u276B" => ")" + +# ) [FULLWIDTH RIGHT PARENTHESIS] +"\uFF09" => ")" + +# ⸩ [RIGHT DOUBLE PARENTHESIS] +"\u2E29" => "))" + +# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] +"\u276C" => "<" + +# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] +"\u2770" => "<" + +# < [FULLWIDTH LESS-THAN SIGN] +"\uFF1C" => "<" + +# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] +"\u276D" => ">" + +# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] +"\u2771" => ">" + +# > [FULLWIDTH GREATER-THAN SIGN] +"\uFF1E" => ">" + +# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT] +"\u2774" => "{" + +# { [FULLWIDTH LEFT CURLY BRACKET] +"\uFF5B" => "{" + +# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT] +"\u2775" => "}" + +# } [FULLWIDTH RIGHT CURLY BRACKET] +"\uFF5D" => "}" + +# ⁺ [SUPERSCRIPT PLUS SIGN] +"\u207A" => "+" + +# ₊ [SUBSCRIPT PLUS SIGN] +"\u208A" => "+" + +# + [FULLWIDTH PLUS SIGN] +"\uFF0B" => "+" + +# ⁼ [SUPERSCRIPT EQUALS SIGN] +"\u207C" => "=" + +# ₌ [SUBSCRIPT EQUALS SIGN] +"\u208C" => "=" + +# = [FULLWIDTH EQUALS SIGN] +"\uFF1D" => "=" + +# ! [FULLWIDTH EXCLAMATION MARK] +"\uFF01" => "!" + +# ‼ [DOUBLE EXCLAMATION MARK] +"\u203C" => "!!" + +# ⁉ [EXCLAMATION QUESTION MARK] +"\u2049" => "!?" + +# # [FULLWIDTH NUMBER SIGN] +"\uFF03" => "#" + +# $ [FULLWIDTH DOLLAR SIGN] +"\uFF04" => "$" + +# ⁒ [COMMERCIAL MINUS SIGN] +"\u2052" => "%" + +# % [FULLWIDTH PERCENT SIGN] +"\uFF05" => "%" + +# & [FULLWIDTH AMPERSAND] +"\uFF06" => "&" + +# ⁎ [LOW ASTERISK] +"\u204E" => "*" + +# * [FULLWIDTH ASTERISK] +"\uFF0A" => "*" + +# , [FULLWIDTH COMMA] +"\uFF0C" => "," + +# . [FULLWIDTH FULL STOP] +"\uFF0E" => "." + +# ⁄ [FRACTION SLASH] +"\u2044" => "/" + +# / [FULLWIDTH SOLIDUS] +"\uFF0F" => "/" + +# : [FULLWIDTH COLON] +"\uFF1A" => ":" + +# ⁏ [REVERSED SEMICOLON] +"\u204F" => ";" + +# ; [FULLWIDTH SEMICOLON] +"\uFF1B" => ";" + +# ? [FULLWIDTH QUESTION MARK] +"\uFF1F" => "?" + +# ⁇ [DOUBLE QUESTION MARK] +"\u2047" => "??" + +# ⁈ [QUESTION EXCLAMATION MARK] +"\u2048" => "?!" + +# @ [FULLWIDTH COMMERCIAL AT] +"\uFF20" => "@" + +# \ [FULLWIDTH REVERSE SOLIDUS] +"\uFF3C" => "\\" + +# ‸ [CARET] +"\u2038" => "^" + +# ^ [FULLWIDTH CIRCUMFLEX ACCENT] +"\uFF3E" => "^" + +# _ [FULLWIDTH LOW LINE] +"\uFF3F" => "_" + +# ⁓ [SWUNG DASH] +"\u2053" => "~" + +# ~ [FULLWIDTH TILDE] +"\uFF5E" => "~" + +################################################################ +# Below is the Perl script used to generate the above mappings # +# from ASCIIFoldingFilter.java: # +################################################################ +# +# #!/usr/bin/perl +# +# use warnings; +# use strict; +# +# my @source_chars = (); +# my @source_char_descriptions = (); +# my $target = ''; +# +# while (<>) { +# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) { +# push @source_chars, $1; +# push @source_char_descriptions, $2; +# next; +# } +# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) { +# $target .= $1; +# next; +# } +# if (/break;/) { +# $target = "\\\"" if ($target eq '"'); +# for my $source_char_num (0..$#source_chars) { +# print "# $source_char_descriptions[$source_char_num]\n"; +# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n"; +# } +# @source_chars = (); +# @source_char_descriptions = (); +# $target = ''; +# } +# } diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-ISOLatin1Accent.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-ISOLatin1Accent.txt new file mode 100644 index 000000000000..ede7742581be --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/mapping-ISOLatin1Accent.txt @@ -0,0 +1,246 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Syntax: +# "source" => "target" +# "source".length() > 0 (source cannot be empty.) +# "target".length() >= 0 (target can be empty.) + +# example: +# "À" => "A" +# "\u00C0" => "A" +# "\u00C0" => "\u0041" +# "ß" => "ss" +# "\t" => " " +# "\n" => "" + +# À => A +"\u00C0" => "A" + +# Á => A +"\u00C1" => "A" + +#  => A +"\u00C2" => "A" + +# à => A +"\u00C3" => "A" + +# Ä => A +"\u00C4" => "A" + +# Å => A +"\u00C5" => "A" + +# Æ => AE +"\u00C6" => "AE" + +# Ç => C +"\u00C7" => "C" + +# È => E +"\u00C8" => "E" + +# É => E +"\u00C9" => "E" + +# Ê => E +"\u00CA" => "E" + +# Ë => E +"\u00CB" => "E" + +# Ì => I +"\u00CC" => "I" + +# Í => I +"\u00CD" => "I" + +# Î => I +"\u00CE" => "I" + +# Ï => I +"\u00CF" => "I" + +# IJ => IJ +"\u0132" => "IJ" + +# Ð => D +"\u00D0" => "D" + +# Ñ => N +"\u00D1" => "N" + +# Ò => O +"\u00D2" => "O" + +# Ó => O +"\u00D3" => "O" + +# Ô => O +"\u00D4" => "O" + +# Õ => O +"\u00D5" => "O" + +# Ö => O +"\u00D6" => "O" + +# Ø => O +"\u00D8" => "O" + +# Œ => OE +"\u0152" => "OE" + +# Þ +"\u00DE" => "TH" + +# Ù => U +"\u00D9" => "U" + +# Ú => U +"\u00DA" => "U" + +# Û => U +"\u00DB" => "U" + +# Ü => U +"\u00DC" => "U" + +# Ý => Y +"\u00DD" => "Y" + +# Ÿ => Y +"\u0178" => "Y" + +# à => a +"\u00E0" => "a" + +# á => a +"\u00E1" => "a" + +# â => a +"\u00E2" => "a" + +# ã => a +"\u00E3" => "a" + +# ä => a +"\u00E4" => "a" + +# å => a +"\u00E5" => "a" + +# æ => ae +"\u00E6" => "ae" + +# ç => c +"\u00E7" => "c" + +# è => e +"\u00E8" => "e" + +# é => e +"\u00E9" => "e" + +# ê => e +"\u00EA" => "e" + +# ë => e +"\u00EB" => "e" + +# ì => i +"\u00EC" => "i" + +# í => i +"\u00ED" => "i" + +# î => i +"\u00EE" => "i" + +# ï => i +"\u00EF" => "i" + +# ij => ij +"\u0133" => "ij" + +# ð => d +"\u00F0" => "d" + +# ñ => n +"\u00F1" => "n" + +# ò => o +"\u00F2" => "o" + +# ó => o +"\u00F3" => "o" + +# ô => o +"\u00F4" => "o" + +# õ => o +"\u00F5" => "o" + +# ö => o +"\u00F6" => "o" + +# ø => o +"\u00F8" => "o" + +# œ => oe +"\u0153" => "oe" + +# ß => ss +"\u00DF" => "ss" + +# þ => th +"\u00FE" => "th" + +# ù => u +"\u00F9" => "u" + +# ú => u +"\u00FA" => "u" + +# û => u +"\u00FB" => "u" + +# ü => u +"\u00FC" => "u" + +# ý => y +"\u00FD" => "y" + +# ÿ => y +"\u00FF" => "y" + +# ff => ff +"\uFB00" => "ff" + +# fi => fi +"\uFB01" => "fi" + +# fl => fl +"\uFB02" => "fl" + +# ffi => ffi +"\uFB03" => "ffi" + +# ffl => ffl +"\uFB04" => "ffl" + +# ſt => ft +"\uFB05" => "ft" + +# st => st +"\uFB06" => "st" diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/params.json b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/params.json new file mode 100644 index 000000000000..ac72676a78ac --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/params.json @@ -0,0 +1,11 @@ +{ + "params": { + "_UPDATE_JSON_DOCS": { + "srcField": "_src_", + "mapUniqueKeyOnly": true, + "": { + "v": 0 + } + } + } +} \ No newline at end of file diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/protwords.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/protwords.txt new file mode 100644 index 000000000000..1dfc0abecbf8 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/solrconfig.xml b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/solrconfig.xml new file mode 100644 index 000000000000..59d056918267 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/solrconfig.xml @@ -0,0 +1,1423 @@ + + + + + + + + + 9.0.0 + + + + + + + + + + + + + + + + + + + + + + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ${solr.lock.type:native} + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + + ${solr.autoCommit.maxTime:15000} + false + + + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + + + + + + + + ${solr.max.booleanClauses:1024} + + + + -1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + explicit + json + true + text + + + + + + text + + + + + + + + + true + ignored_ + + + true + links + ignored_ + + + + + + 5 + + + + + + text_general + + + + + + default + text + solr.DirectSolrSpellChecker + + internal + + 0.5 + + 2 + + 1 + + 5 + + 4 + + 0.01 + + + + + + wordbreak + solr.WordBreakSolrSpellChecker + name + true + true + 10 + + + + + + + + + + + + + + + + + default + wordbreak + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + spellcheck + + + + + + + mySuggester + FuzzyLookupFactory + DocumentDictionaryFactory + cat + price + string + false + + + + + + true + 10 + + + suggest + + + + + + + + + + + true + + + tvComponent + + + + + + + + + lingo3g + true + Lingo3G + name, features + true + English + + + + lingo + Lingo + name, features + true + English + + + + stc + STC + name, features + true + English + + + + kmeans + Bisecting K-Means + name, features + true + English + + + + + + + + true + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 100 + *,score + + + + + clustering + + + + + + + + string + elevate.xml + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + 10 + .,!? + + + + + + + WORD + + + en + US + + + + + + + + + + + + + + + + + + + + + + 5 + + + + + + + + + + + + + + + + + + + QUERY_DOC_FV + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/spellings.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/spellings.txt new file mode 100644 index 000000000000..d7ede6f5611d --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/spellings.txt @@ -0,0 +1,2 @@ +pizza +history \ No newline at end of file diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/stopwords.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/stopwords.txt new file mode 100644 index 000000000000..ae1e83eeb3d4 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/synonyms.txt b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/synonyms.txt new file mode 100644 index 000000000000..eab4ee875373 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/update-script.js b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/update-script.js new file mode 100644 index 000000000000..bd361182d2cc --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/update-script.js @@ -0,0 +1,53 @@ +/* + This is a basic skeleton JavaScript update processor. + + In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in + the example solrconfig.xml and must be uncommented to be enabled. + + See https://lucene.apache.org/solr/guide/script-update-processor.html for more details. +*/ + +function processAdd(cmd) { + + doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument + id = doc.getFieldValue("id"); + logger.warn("update-script#processAdd: id=" + id); // WARN level messages will show up in Solr Admin Logging UI + +// Set a field value: +// doc.setField("foo_s", "whatever"); + +// Get a configuration parameter: +// config_param = params.get('config_param'); // "params" only exists if processor configured with + +// Get a request parameter: +// some_param = req.getParams().get("some_param") + +// Add a field of field names that match a pattern: +// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss +// field_names = doc.getFieldNames().toArray(); +// for(i=0; i < field_names.length; i++) { +// field_name = field_names[i]; +// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); } +// } + +} + +function processDelete(cmd) { + // no-op +} + +function processMergeIndexes(cmd) { + // no-op +} + +function processCommit(cmd) { + // no-op +} + +function processRollback(cmd) { + // no-op +} + +function finish() { + // no-op +} diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example.xsl b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example.xsl new file mode 100644 index 000000000000..b8992700828e --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example.xsl @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + <xsl:value-of select="$title"/> + + + +

+
+ This has been formatted by the sample "example.xsl" transform - + use your own XSLT to get a nicer page +
+ + + +
+ + + +
+ + + + +
+
+
+ + + + + + + + + + + + + + javascript:toggle("");? +
+ + exp + + + + + +
+ + +
+ + + + + + + +
    + +
  • +
    +
+ + +
+ + + + + + + + + + + + + + + + + + + + +
diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_atom.xsl b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_atom.xsl new file mode 100644 index 000000000000..b6c23151dc4a --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_atom.xsl @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + Example Solr Atom 1.0 Feed + + This has been formatted by the sample "example_atom.xsl" transform - + use your own XSLT to get a nicer Atom feed. + + + Apache Solr + solr-user@lucene.apache.org + + + + + + tag:localhost,2007:example + + + + + + + + + <xsl:value-of select="str[@name='name']"/> + + tag:localhost,2007: + + + + + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_rss.xsl b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_rss.xsl new file mode 100644 index 000000000000..c8ab5bfb1ec8 --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/example_rss.xsl @@ -0,0 +1,66 @@ + + + + + + + + + + + + + Example Solr RSS 2.0 Feed + http://localhost:8983/solr + + This has been formatted by the sample "example_rss.xsl" transform - + use your own XSLT to get a nicer RSS feed. + + en-us + http://localhost:8983/solr + + + + + + + + + + + <xsl:value-of select="str[@name='name']"/> + + http://localhost:8983/solr/select?q=id: + + + + + + + http://localhost:8983/solr/select?q=id: + + + + diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/luke.xsl b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/luke.xsl new file mode 100644 index 000000000000..05fb5bfeee2d --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/luke.xsl @@ -0,0 +1,337 @@ + + + + + + + + + Solr Luke Request Handler Response + + + + + + + + + <xsl:value-of select="$title"/> + + + + + +

+ +

+
+ +
+ +

Index Statistics

+ +
+ +

Field Statistics

+ + + +

Document statistics

+ + + + +
+ + + + + +
+ +
+ + +
+ +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + +
+

+ +

+ +
+ +
+
+
+ + +
+ + 50 + 800 + 160 + blue + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ background-color: ; width: px; height: px; +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
  • + +
  • +
    +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + + + + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + - + + + + + + + + + + + + + + + + + +
diff --git a/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/updateXml.xsl b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/updateXml.xsl new file mode 100644 index 000000000000..a4a0512dd0ab --- /dev/null +++ b/solr/solrj/src/test-files/solrj/solr/testproducts/conf/xslt/updateXml.xsl @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java index 73f376e052be..2659b7ccfd24 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleXMLTest.java @@ -16,18 +16,12 @@ */ package org.apache.solr.client.solrj; -import org.apache.commons.io.FileUtils; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.request.RequestWriter; -import org.eclipse.jetty.servlet.ServletHolder; import org.junit.BeforeClass; -import java.io.File; -import java.util.SortedMap; -import java.util.TreeMap; - /** * A subclass of SolrExampleTests that explicitly uses the xml codec for * communication. @@ -35,10 +29,10 @@ @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") public class SolrExampleXMLTest extends SolrExampleTests { - @BeforeClass - public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); - } +// @BeforeClass +// public static void beforeTest() throws Exception { +// createAndStartJetty(testProductsCollection1SolrHome()); +// } @Override public SolrClient createNewSolrClient() { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java b/solr/solrj/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java index 3e6f03d03eec..800f14d49c1a 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/TestBatchUpdate.java @@ -41,7 +41,7 @@ public class TestBatchUpdate extends SolrJettyTestBase { @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); } static final int numdocs = 1000; diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/TestSolrJErrorHandling.java b/solr/solrj/src/test/org/apache/solr/client/solrj/TestSolrJErrorHandling.java index c9e2caec6286..e7c176568586 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/TestSolrJErrorHandling.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/TestSolrJErrorHandling.java @@ -58,7 +58,7 @@ public class TestSolrJErrorHandling extends SolrJettyTestBase { @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); } @Override diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java index 3a1fc893fbc6..da606c46831b 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/JettyWebappTest.java @@ -62,7 +62,7 @@ public class JettyWebappTest extends SolrTestCaseJ4 public void setUp() throws Exception { super.setUp(); - System.setProperty("solr.solr.home", SolrJettyTestBase.legacyExampleCollection1SolrHome()); + System.setProperty("solr.solr.home", SolrJettyTestBase.testProductsCollection1SolrHome()); System.setProperty("tests.shardhandler.randomSeed", Long.toString(random().nextLong())); System.setProperty("solr.tests.doContainerStreamCloseAssert", "false"); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/LargeVolumeEmbeddedTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/LargeVolumeEmbeddedTest.java index 52c68a267d8d..06643cd0c7ad 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/LargeVolumeEmbeddedTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/LargeVolumeEmbeddedTest.java @@ -16,12 +16,16 @@ */ package org.apache.solr.client.solrj.embedded; +import org.apache.solr.SolrJettyTestBase; import org.apache.solr.client.solrj.LargeVolumeTestBase; import org.junit.BeforeClass; public class LargeVolumeEmbeddedTest extends LargeVolumeTestBase { @BeforeClass public static void beforeTest() throws Exception { - initCore(); + final String home = SolrJettyTestBase.testProductsCollection1SolrHome(); + final String config = home + "/" + DEFAULT_CORE_NAME + "/conf/solrconfig.xml"; + final String schema = home + "/" + DEFAULT_CORE_NAME + "/conf/schema.xml"; + initCore(config, schema, home); } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java index af7f5a4533ab..b26073f376e4 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java @@ -201,7 +201,7 @@ public static void beforeTest() throws Exception { .withServlet(new ServletHolder(DebugServlet.class), "/debug/*") .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientBadInputTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientBadInputTest.java index dcd7058b23f9..04a8a7eb7c57 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientBadInputTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientBadInputTest.java @@ -40,7 +40,7 @@ public static void beforeTest() throws Exception { JettyConfig jettyConfig = JettyConfig.builder() .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientTest.java index 36c1a068b5a8..2fdb389f35eb 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClientTest.java @@ -44,7 +44,7 @@ public static void beforeTest() throws Exception { .withServlet(new ServletHolder(ConcurrentUpdateSolrClientTest.TestServlet.class), "/cuss/*") .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientBadInputTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientBadInputTest.java index d911a15a1bc8..5af087714169 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientBadInputTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientBadInputTest.java @@ -40,7 +40,7 @@ public static void beforeTest() throws Exception { JettyConfig jettyConfig = JettyConfig.builder() .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java index 56a32b47eb62..49454e633412 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java @@ -131,7 +131,7 @@ public static void beforeTest() throws Exception { .withServlet(new ServletHolder(TestServlet.class), "/cuss/*") .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientCompatibilityTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientCompatibilityTest.java index 16f687527bfe..1ea5b519d62d 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientCompatibilityTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientCompatibilityTest.java @@ -51,7 +51,7 @@ public void testConnectToOldNodesUsingHttp1() throws Exception { .withServlet(new ServletHolder(Http2SolrClientTest.DebugServlet.class), "/debug/*") .useOnlyHttp1(true) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); try (Http2SolrClient client = new Http2SolrClient.Builder(jetty.getBaseUrl().toString() + "/debug/foo") .useHttp1_1(true) @@ -71,7 +71,7 @@ public void testConnectToNewNodesUsingHttp1() throws Exception { .withServlet(new ServletHolder(Http2SolrClientTest.DebugServlet.class), "/debug/*") .useOnlyHttp1(false) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); try (Http2SolrClient client = new Http2SolrClient.Builder(jetty.getBaseUrl().toString() + "/debug/foo") .useHttp1_1(true) @@ -92,7 +92,7 @@ public void testConnectToOldNodesUsingHttp2() throws Exception { .withServlet(new ServletHolder(Http2SolrClientTest.DebugServlet.class), "/debug/*") .useOnlyHttp1(true) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); System.clearProperty("solr.http1"); try (Http2SolrClient client = new Http2SolrClient.Builder(jetty.getBaseUrl().toString() + "/debug/foo") diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientTest.java index 059cd07a7e25..b610bf92ed37 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/Http2SolrClientTest.java @@ -176,7 +176,7 @@ public static void beforeTest() throws Exception { .withServlet(new ServletHolder(DebugServlet.class), "/debug/*") .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Override diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientBadInputTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientBadInputTest.java index 10d583db2a52..cb4b599bc148 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientBadInputTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientBadInputTest.java @@ -41,7 +41,7 @@ public static void beforeTest() throws Exception { JettyConfig jettyConfig = JettyConfig.builder() .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientConPoolTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientConPoolTest.java index bd573e81e2d0..b45520175c08 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientConPoolTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/HttpSolrClientConPoolTest.java @@ -46,12 +46,12 @@ public class HttpSolrClientConPoolTest extends SolrJettyTestBase { @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); // stealing the first made jetty yetty = jetty; barUrl = yetty.getBaseUrl().toString() + "/" + "collection1"; - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); fooUrl = jetty.getBaseUrl().toString() + "/" + "collection1"; } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBHttpSolrClientBadInputTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBHttpSolrClientBadInputTest.java index 5085e6f4733b..9c1fc8dc0a87 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBHttpSolrClientBadInputTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBHttpSolrClientBadInputTest.java @@ -38,7 +38,7 @@ public static void beforeTest() throws Exception { JettyConfig jettyConfig = JettyConfig.builder() .withSSLConfig(sslConfig.buildServerSSLConfig()) .build(); - createAndStartJetty(legacyExampleCollection1SolrHome(), jettyConfig); + createAndStartJetty(testProductsCollection1SolrHome(), jettyConfig); } @Test diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java index 26fcb13050df..5b35e12502c3 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/NoOpResponseParserTest.java @@ -55,7 +55,7 @@ private static InputStream getResponse() throws IOException { @BeforeClass public static void beforeTest() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); } @Before diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSuggesterResponse.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSuggesterResponse.java index ca49896bfa22..b41c519199e8 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSuggesterResponse.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestSuggesterResponse.java @@ -43,7 +43,7 @@ public class TestSuggesterResponse extends SolrJettyTestBase { @BeforeClass public static void beforeClass() throws Exception { - createAndStartJetty(legacyExampleCollection1SolrHome()); + createAndStartJetty(testProductsCollection1SolrHome()); } @Before diff --git a/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkConfigSetService.java b/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkConfigSetService.java index c2c79753802d..3296d67a626d 100644 --- a/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkConfigSetService.java +++ b/solr/solrj/src/test/org/apache/solr/common/cloud/TestZkConfigSetService.java @@ -213,7 +213,7 @@ protected Collection createCredentials() { @Test public void testBootstrapConf() throws IOException, KeeperException, InterruptedException { - String solrHome = SolrJettyTestBase.legacyExampleCollection1SolrHome(); + String solrHome = SolrJettyTestBase.testProductsCollection1SolrHome(); CoreContainer cc = new CoreContainer(Paths.get(solrHome), new Properties()); System.setProperty("zkHost", zkServer.getZkAddress()); diff --git a/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java b/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java index 5785c00ad268..4b28cf2efb4e 100644 --- a/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java @@ -122,8 +122,11 @@ public Collection download(final String collection, final String. return result; } + // I don't like that this is a test-framework, but references + // SolrJettyTestBase.testProductsCollection1SolrHome which refers to a collection only used by solrj + // tests predominantly. Also it's confusing that this initCore overrides the initCore in SolrTestCaseJ4 public static void initCore() throws Exception { - final String home = legacyExampleCollection1SolrHome(); + final String home = SolrJettyTestBase.testProductsCollection1SolrHome(); final String config = home + "/" + DEFAULT_CORE_NAME + "/conf/solrconfig.xml"; final String schema = home + "/" + DEFAULT_CORE_NAME + "/conf/schema.xml"; initCore(config, schema, home); diff --git a/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java b/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java index 538ee511212b..b2dc7a058726 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrJettyTestBase.java @@ -167,12 +167,13 @@ public static void cleanUpJettyHome(File solrHome) throws Exception { } } - public static String legacyExampleCollection1SolrHome() { + //public static String legacyExampleCollection1SolrHome() { // Let us pick which location we are pointing to, temporary for now!!! //return legacyTechProductsCollection1SolrHome(); - return testProductsCollection1SolrHome(); - } + //return testProductsCollection1SolrHome(); + //} + // I feel like this method should live not on this class, but in a class in the solr/solrj project. public static String testProductsCollection1SolrHome(){ File tmpSolrHome = createTempDir().toFile(); try { @@ -206,47 +207,46 @@ public static String testProductsCollection1SolrHome(){ return tmpSolrHome.getAbsolutePath(); } - public static String legacyTechProductsCollection1SolrHome() { - - - - String sourceHome = ExternalPaths.SOURCE_HOME; - if (sourceHome == null) - throw new IllegalStateException("No source home! Cannot create the legacy example solr home directory."); - - String legacyExampleSolrHome = null; - try { - File tempSolrHome = LuceneTestCase.createTempDir().toFile(); - org.apache.commons.io.FileUtils.copyFileToDirectory(new File(sourceHome, "server/solr/solr.xml"), tempSolrHome); - File collection1Dir = new File(tempSolrHome, "collection1"); - org.apache.commons.io.FileUtils.forceMkdir(collection1Dir); - - File configSetDir = new File(sourceHome, "server/solr/configsets/sample_techproducts_configs/conf"); - org.apache.commons.io.FileUtils.copyDirectoryToDirectory(configSetDir, collection1Dir); - Properties props = new Properties(); - props.setProperty("name", "collection1"); - OutputStreamWriter writer = null; - try { - writer = new OutputStreamWriter(FileUtils.openOutputStream( - new File(collection1Dir, "core.properties")), StandardCharsets.UTF_8); - props.store(writer, null); - } finally { - if (writer != null) { - try { - writer.close(); - } catch (Exception ignore){} - } - } - legacyExampleSolrHome = tempSolrHome.getAbsolutePath(); - } catch (Exception exc) { - if (exc instanceof RuntimeException) { - throw (RuntimeException)exc; - } else { - throw new RuntimeException(exc); - } - } - - return legacyExampleSolrHome; - } +// public static String legacyTechProductsCollection1SolrHome() { +// +// +// String sourceHome = ExternalPaths.SOURCE_HOME; +// if (sourceHome == null) +// throw new IllegalStateException("No source home! Cannot create the legacy example solr home directory."); +// +// String legacyExampleSolrHome = null; +// try { +// File tempSolrHome = LuceneTestCase.createTempDir().toFile(); +// org.apache.commons.io.FileUtils.copyFileToDirectory(new File(sourceHome, "server/solr/solr.xml"), tempSolrHome); +// File collection1Dir = new File(tempSolrHome, "collection1"); +// org.apache.commons.io.FileUtils.forceMkdir(collection1Dir); +// +// File configSetDir = new File(sourceHome, "server/solr/configsets/sample_techproducts_configs/conf"); +// org.apache.commons.io.FileUtils.copyDirectoryToDirectory(configSetDir, collection1Dir); +// Properties props = new Properties(); +// props.setProperty("name", "collection1"); +// OutputStreamWriter writer = null; +// try { +// writer = new OutputStreamWriter(FileUtils.openOutputStream( +// new File(collection1Dir, "core.properties")), StandardCharsets.UTF_8); +// props.store(writer, null); +// } finally { +// if (writer != null) { +// try { +// writer.close(); +// } catch (Exception ignore){} +// } +// } +// legacyExampleSolrHome = tempSolrHome.getAbsolutePath(); +// } catch (Exception exc) { +// if (exc instanceof RuntimeException) { +// throw (RuntimeException)exc; +// } else { +// throw new RuntimeException(exc); +// } +// } +// +// return legacyExampleSolrHome; +// } }